diff --git a/.github/workflows/file-issue-for-review.yml b/.github/workflows/file-issue-for-review.yml
index b04dfb41..0a4d5ea7 100644
--- a/.github/workflows/file-issue-for-review.yml
+++ b/.github/workflows/file-issue-for-review.yml
@@ -26,12 +26,15 @@ jobs:
run: |
git config user.name "strudy-bot"
git config user.email "<>"
- git remote set-url --push origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
+ git remote set-url --push origin https://x-access-token:${{ secrets.ISSUE_REPORT_GH_TOKEN }}@github.com/$GITHUB_REPOSITORY
working-directory: strudy
+ - name: Run Strudy to detect new anomalies
+ working-directory: strudy
+ run: node strudy.js inspect ../webref --issues issues --what brokenLinks discontinuedReferences --update-mode old
- name: Run issue filer script
working-directory: strudy
- run: node src/reporting/file-issue-for-review.js ../webref/ed/ ../webref/tr/ brokenLinks
+ run: node src/reporting/file-issue-for-review.js --max 10
env:
- GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ GITHUB_TOKEN: ${{ secrets.ISSUE_REPORT_GH_TOKEN }}
diff --git a/index.js b/index.js
index 42252811..6e550f67 100644
--- a/index.js
+++ b/index.js
@@ -1,8 +1,6 @@
-import studyCrawl from './src/lib/study-crawl.js';
-import studyWebIdl from './src/lib/study-webidl.js';
-import generateReport from './src/lib/generate-report.js';
+import study from './src/lib/study.js';
-export { studyCrawl, studyWebIdl, generateReport };
+export { study };
-const strudy = { studyCrawl, studyWebIdl, generateReport };
+const strudy = { study };
export default strudy;
diff --git a/issues/DOM-Parsing-brokenlinks.md b/issues/DOM-Parsing-brokenlinks.md
index 7557c48e..b3bc5762 100644
--- a/issues/DOM-Parsing-brokenlinks.md
+++ b/issues/DOM-Parsing-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/DOM-Parsing/issues/74'
Title: Broken references in DOM Parsing and Serialization
---
-While crawling [DOM Parsing and Serialization](https://w3c.github.io/DOM-Parsing/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [DOM Parsing and Serialization](https://w3c.github.io/DOM-Parsing/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://www.w3.org/TR/dom/#case-sensitive
* [ ] https://www.w3.org/TR/dom/#ascii-case-insensitive
* [ ] https://www.w3.org/TR/dom/#domexception
diff --git a/issues/FileAPI-brokenlinks.md b/issues/FileAPI-brokenlinks.md
index 5ce77ed3..90390d58 100644
--- a/issues/FileAPI-brokenlinks.md
+++ b/issues/FileAPI-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/FileAPI/issues/185'
Title: Broken references in File API
---
-While crawling [File API](https://w3c.github.io/FileAPI/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [File API](https://w3c.github.io/FileAPI/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://mimesniff.spec.whatwg.org/#parsable-mime-type
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/background-fetch-brokenlinks.md b/issues/background-fetch-brokenlinks.md
index 84c33a46..c244682c 100644
--- a/issues/background-fetch-brokenlinks.md
+++ b/issues/background-fetch-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/background-fetch/issues/167'
Title: Broken references in Background Fetch
---
-While crawling [Background Fetch](https://wicg.github.io/background-fetch/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Background Fetch](https://wicg.github.io/background-fetch/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://fetch.spec.whatwg.org/#concept-fetch-terminate
* [ ] https://w3c.github.io/permissions/#permission-state
* [ ] https://w3c.github.io/permissions/#dictdef-permissiondescriptor
diff --git a/issues/background-sync-brokenlinks.md b/issues/background-sync-brokenlinks.md
index 283a238a..df84bf1a 100644
--- a/issues/background-sync-brokenlinks.md
+++ b/issues/background-sync-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/background-sync/issues/186'
Title: Broken references in Web Background Synchronization
---
-While crawling [Web Background Synchronization](https://wicg.github.io/background-sync/spec/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Web Background Synchronization](https://wicg.github.io/background-sync/spec/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://notifications.spec.whatwg.org/#permission-model
* [ ] https://slightlyoff.github.io/ServiceWorker/spec/service_worker/index.html#service-worker-registration-interface
* [ ] https://slightlyoff.github.io/ServiceWorker/spec/service_worker/index.html#service-worker-global-scope-interface
diff --git a/issues/change-password-url-discontinuedreferences.md b/issues/change-password-url-discontinuedreferences.md
index 39a51ce2..cf63ce19 100644
--- a/issues/change-password-url-discontinuedreferences.md
+++ b/issues/change-password-url-discontinuedreferences.md
@@ -6,7 +6,7 @@ Title: >-
Passwords
---
-While crawling [A Well-Known URL for Changing Passwords](https://w3c.github.io/webappsec-change-password-url/), the following normative referenced were detected as pointing to discontinued specifications:
+While crawling [A Well-Known URL for Changing Passwords](https://w3c.github.io/webappsec-change-password-url/), the following normative references were detected as pointing to discontinued specifications:
* [ ] [HTTP-SEMANTICS](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html)
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/clear-site-data-discontinuedreferences.md b/issues/clear-site-data-discontinuedreferences.md
index c9bceea1..1f9f1b2c 100644
--- a/issues/clear-site-data-discontinuedreferences.md
+++ b/issues/clear-site-data-discontinuedreferences.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webappsec-clear-site-data/issues/79'
Title: Normative references to discontinued specs in Clear Site Data
---
-While crawling [Clear Site Data](https://w3c.github.io/webappsec-clear-site-data/), the following normative referenced were detected as pointing to discontinued specifications:
+While crawling [Clear Site Data](https://w3c.github.io/webappsec-clear-site-data/), the following normative references were detected as pointing to discontinued specifications:
* [ ] [RFC7230](https://httpwg.org/specs/rfc7230.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html),[rfc9112](https://httpwg.org/specs/rfc9112.html)
* [ ] [RFC7234](https://httpwg.org/specs/rfc7234.html) has been obsoleted by [rfc9111](https://httpwg.org/specs/rfc9111.html)
diff --git a/issues/clipboard-apis-brokenlinks.md b/issues/clipboard-apis-brokenlinks.md
index 450da512..bd879dd4 100644
--- a/issues/clipboard-apis-brokenlinks.md
+++ b/issues/clipboard-apis-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/clipboard-apis/issues/187'
Title: Broken references in Clipboard API and events
---
-While crawling [Clipboard API and events](https://w3c.github.io/clipboard-apis/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Clipboard API and events](https://w3c.github.io/clipboard-apis/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://html.spec.whatwg.org/multipage/interaction.html#drag-data-item-kind
* [ ] https://html.spec.whatwg.org/multipage/interaction.html#drag-data-item-type-string
diff --git a/issues/content-index-brokenlinks.md b/issues/content-index-brokenlinks.md
index 60d22d24..1e646ec7 100644
--- a/issues/content-index-brokenlinks.md
+++ b/issues/content-index-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/content-index/issues/33'
Title: Broken references in Content Index
---
-While crawling [Content Index](https://wicg.github.io/content-index/spec/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Content Index](https://wicg.github.io/content-index/spec/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://dom.spec.whatwg.org/#context-object
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/csp-embedded-enforcement-brokenlinks.md b/issues/csp-embedded-enforcement-brokenlinks.md
index 5a7e8925..4c564240 100644
--- a/issues/csp-embedded-enforcement-brokenlinks.md
+++ b/issues/csp-embedded-enforcement-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webappsec-cspee/issues/27'
Title: 'Broken references in Content Security Policy: Embedded Enforcement'
---
-While crawling [Content Security Policy: Embedded Enforcement](https://w3c.github.io/webappsec-cspee/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Content Security Policy: Embedded Enforcement](https://w3c.github.io/webappsec-cspee/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://fetch.spec.whatwg.org/#concept-response-csp-list
* [ ] https://w3c.github.io/webappsec-csp/#port-part-match
diff --git a/issues/css-line-grid-1-brokenlinks.md b/issues/css-line-grid-1-brokenlinks.md
index fbeb4e2d..c610c59f 100644
--- a/issues/css-line-grid-1-brokenlinks.md
+++ b/issues/css-line-grid-1-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/csswg-drafts/issues/8080'
Title: '[css-line-grid] Broken references in CSS Line Grid Module Level 1'
---
-While crawling [CSS Line Grid Module Level 1](https://drafts.csswg.org/css-line-grid/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [CSS Line Grid Module Level 1](https://drafts.csswg.org/css-line-grid/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://drafts.csswg.org/css-inline/#central
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/css-nav-1-brokenlinks.md b/issues/css-nav-1-brokenlinks.md
index e62e0937..5cd163e7 100644
--- a/issues/css-nav-1-brokenlinks.md
+++ b/issues/css-nav-1-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/csswg-drafts/issues/8081'
Title: '[css-nav-1] Broken references in CSS Spatial Navigation Level 1'
---
-While crawling [CSS Spatial Navigation Level 1](https://drafts.csswg.org/css-nav-1/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [CSS Spatial Navigation Level 1](https://drafts.csswg.org/css-nav-1/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://drafts.csswg.org/css2/box.html#x14
* [ ] https://html.spec.whatwg.org/multipage/infrastructure.html#nodes-are-removed
* [ ] https://html.spec.whatwg.org/multipage/interaction.html#expressly-inert
diff --git a/issues/filter-effects-1-brokenlinks.md b/issues/filter-effects-1-brokenlinks.md
index 2a059ffb..414512f1 100644
--- a/issues/filter-effects-1-brokenlinks.md
+++ b/issues/filter-effects-1-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/fxtf-drafts/issues/482'
Title: '[filter-effects-1] Broken references in Filter Effects Module Level 1'
---
-While crawling [Filter Effects Module Level 1](https://drafts.fxtf.org/filter-effects-1/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Filter Effects Module Level 1](https://drafts.fxtf.org/filter-effects-1/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://drafts.csswg.org/css-transitions/#animtype-length
* [ ] https://drafts.csswg.org/css-transitions/#animtype-number
* [ ] https://drafts.csswg.org/css-transitions/#animtype-shadow-list
diff --git a/issues/get-installed-related-apps-brokenlinks.md b/issues/get-installed-related-apps-brokenlinks.md
index 766fed98..402045af 100644
--- a/issues/get-installed-related-apps-brokenlinks.md
+++ b/issues/get-installed-related-apps-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/get-installed-related-apps/issues/35'
Title: Broken references in Get Installed Related Apps API
---
-While crawling [Get Installed Related Apps API](https://wicg.github.io/get-installed-related-apps/spec/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Get Installed Related Apps API](https://wicg.github.io/get-installed-related-apps/spec/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://dom.spec.whatwg.org/#context-object
* [ ] https://www.w3.org/TR/appmanifest/#dom-fingerprint
* [ ] https://www.w3.org/TR/appmanifest/#dom-externalapplicationresource
diff --git a/issues/html-aam-1.0-brokenlinks.md b/issues/html-aam-1.0-brokenlinks.md
index 6dee9b94..212eaaf1 100644
--- a/issues/html-aam-1.0-brokenlinks.md
+++ b/issues/html-aam-1.0-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/html-aam/issues/447'
Title: Broken references in HTML Accessibility API Mappings 1.0
---
-While crawling [HTML Accessibility API Mappings 1.0](https://w3c.github.io/html-aam/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [HTML Accessibility API Mappings 1.0](https://w3c.github.io/html-aam/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://html.spec.whatwg.org/multipage/iframe-embed-object.html#attr-param-name
* [ ] https://html.spec.whatwg.org/multipage/microdata.html#attr-itemprop
* [ ] https://html.spec.whatwg.org/multipage/sections.html#sectioning-root
diff --git a/issues/html-discontinuedreferences.md b/issues/html-discontinuedreferences.md
index 26c2131b..17b4a9f4 100644
--- a/issues/html-discontinuedreferences.md
+++ b/issues/html-discontinuedreferences.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/whatwg/html/issues/9981'
Title: Normative references to discontinued specs in HTML Standard
---
-While crawling [HTML Standard](https://html.spec.whatwg.org/multipage/), the following normative referenced were detected as pointing to discontinued specifications:
+While crawling [HTML Standard](https://html.spec.whatwg.org/multipage/), the following normative references were detected as pointing to discontinued specifications:
* [ ] [HTTP](https://httpwg.org/specs/rfc7230.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html),[rfc9112](https://httpwg.org/specs/rfc9112.html)
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/intersection-observer-brokenlinks.md b/issues/intersection-observer-brokenlinks.md
index f2a55e28..ec12aee0 100644
--- a/issues/intersection-observer-brokenlinks.md
+++ b/issues/intersection-observer-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/IntersectionObserver/issues/506'
Title: Broken references in Intersection Observer
---
-While crawling [Intersection Observer](https://w3c.github.io/IntersectionObserver/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Intersection Observer](https://w3c.github.io/IntersectionObserver/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] http://www.w3.org/TR/hr-time/#domhighrestimestamp
* [ ] http://www.w3.org/TR/hr-time/#time-origin
* [ ] https://drafts.csswg.org/css-box/#containing-block
diff --git a/issues/json-ld11-discontinuedreferences.md b/issues/json-ld11-discontinuedreferences.md
index 5a00c830..cc8d9a3a 100644
--- a/issues/json-ld11-discontinuedreferences.md
+++ b/issues/json-ld11-discontinuedreferences.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/json-ld-syntax/issues/423'
Title: Normative references to discontinued specs in JSON-LD 1.1
---
-While crawling [JSON-LD 1.1](https://w3c.github.io/json-ld-syntax/), the following normative referenced were detected as pointing to discontinued specifications:
+While crawling [JSON-LD 1.1](https://w3c.github.io/json-ld-syntax/), the following normative references were detected as pointing to discontinued specifications:
* [ ] [RFC7231](https://tools.ietf.org/html/rfc7231) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html)
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/keyboard-lock-brokenlinks.md b/issues/keyboard-lock-brokenlinks.md
index ec76a0d7..aa6a0b4a 100644
--- a/issues/keyboard-lock-brokenlinks.md
+++ b/issues/keyboard-lock-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/keyboard-lock/issues/68'
Title: Broken references in Keyboard Lock
---
-While crawling [Keyboard Lock](https://wicg.github.io/keyboard-lock/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Keyboard Lock](https://wicg.github.io/keyboard-lock/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] http://www.w3.org/TR/uievents-code/#code-keyw
* [ ] http://www.w3.org/TR/uievents-code/#code-keya
* [ ] http://www.w3.org/TR/uievents-code/#code-keys
diff --git a/issues/keyboard-map-brokenlinks.md b/issues/keyboard-map-brokenlinks.md
index a5e96f45..b2af1f0d 100644
--- a/issues/keyboard-map-brokenlinks.md
+++ b/issues/keyboard-map-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/keyboard-map/issues/43'
Title: Broken references in Keyboard Map
---
-While crawling [Keyboard Map](https://wicg.github.io/keyboard-map/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Keyboard Map](https://wicg.github.io/keyboard-map/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] http://www.w3.org/TR/uievents-code/#code-quote
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/layout-instability-brokenlinks.md b/issues/layout-instability-brokenlinks.md
index 8ce34650..42c1ebc4 100644
--- a/issues/layout-instability-brokenlinks.md
+++ b/issues/layout-instability-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/layout-instability/issues/116'
Title: Broken references in Layout Instability API
---
-While crawling [Layout Instability API](https://wicg.github.io/layout-instability/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Layout Instability API](https://wicg.github.io/layout-instability/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://w3c.github.io/resource-timing/#sec-privacy-security
* [ ] https://www.w3.org/TR/css-values-4/#pixel-unit
diff --git a/issues/media-feeds-discontinuedreferences.md b/issues/media-feeds-discontinuedreferences.md
index b2d7b9b3..fff2bf50 100644
--- a/issues/media-feeds-discontinuedreferences.md
+++ b/issues/media-feeds-discontinuedreferences.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/media-feeds/issues/60'
Title: Normative references to discontinued specs in Media Feeds
---
-While crawling [Media Feeds](https://wicg.github.io/media-feeds/), the following normative referenced were detected as pointing to discontinued specifications:
+While crawling [Media Feeds](https://wicg.github.io/media-feeds/), the following normative references were detected as pointing to discontinued specifications:
* [ ] [rfc7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html)
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/nav-tracking-mitigations-discontinuedreferences.md b/issues/nav-tracking-mitigations-discontinuedreferences.md
index 3e4583fd..3380c148 100644
--- a/issues/nav-tracking-mitigations-discontinuedreferences.md
+++ b/issues/nav-tracking-mitigations-discontinuedreferences.md
@@ -6,7 +6,7 @@ Title: >-
Mitigations
---
-While crawling [Navigational-Tracking Mitigations](https://privacycg.github.io/nav-tracking-mitigations/), the following normative referenced were detected as pointing to discontinued specifications:
+While crawling [Navigational-Tracking Mitigations](https://privacycg.github.io/nav-tracking-mitigations/), the following normative references were detected as pointing to discontinued specifications:
* [ ] [RFC7234](https://httpwg.org/specs/rfc7234.html) has been obsoleted by [rfc9111](https://httpwg.org/specs/rfc9111.html)
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/netinfo-discontinuedreferences.md b/issues/netinfo-discontinuedreferences.md
index 0a930781..72f8df5c 100644
--- a/issues/netinfo-discontinuedreferences.md
+++ b/issues/netinfo-discontinuedreferences.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/netinfo/issues/97'
Title: Normative references to discontinued specs in Network Information API
---
-While crawling [Network Information API](https://wicg.github.io/netinfo/), the following normative referenced were detected as pointing to discontinued specifications:
+While crawling [Network Information API](https://wicg.github.io/netinfo/), the following normative references were detected as pointing to discontinued specifications:
* [ ] [RFC7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html)
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/periodic-background-sync-brokenlinks.md b/issues/periodic-background-sync-brokenlinks.md
index b8010d2d..f725b6ad 100644
--- a/issues/periodic-background-sync-brokenlinks.md
+++ b/issues/periodic-background-sync-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/periodic-background-sync/issues/11'
Title: Broken references in Web Periodic Background Synchronization
---
-While crawling [Web Periodic Background Synchronization](https://wicg.github.io/periodic-background-sync/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Web Periodic Background Synchronization](https://wicg.github.io/periodic-background-sync/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://dom.spec.whatwg.org/#context-object
* [ ] https://w3c.github.io/permissions/#enumdef-permissionstate
* [ ] https://w3c.github.io/permissions/#dictdef-permissiondescriptor
diff --git a/issues/permissions-request-brokenlinks.md b/issues/permissions-request-brokenlinks.md
index 160c7a8b..a3a1c2bb 100644
--- a/issues/permissions-request-brokenlinks.md
+++ b/issues/permissions-request-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/permissions-request/issues/8'
Title: Broken references in Requesting Permissions
---
-While crawling [Requesting Permissions](https://wicg.github.io/permissions-request/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Requesting Permissions](https://wicg.github.io/permissions-request/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://w3c.github.io/permissions/#permission-registry
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/portals-brokenlinks.md b/issues/portals-brokenlinks.md
index fa539cf0..5ddbf5f4 100644
--- a/issues/portals-brokenlinks.md
+++ b/issues/portals-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/portals/issues/285'
Title: Broken references in Portals
---
-While crawling [Portals](https://wicg.github.io/portals/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Portals](https://wicg.github.io/portals/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://html.spec.whatwg.org/multipage/web-messaging.html#postmessageoptions
* [ ] https://html.spec.whatwg.org/multipage/web-messaging.html#dom-postmessageoptions-transfer
diff --git a/issues/raw-camera-access-brokenlinks.md b/issues/raw-camera-access-brokenlinks.md
index 0ce3d54d..2239d382 100644
--- a/issues/raw-camera-access-brokenlinks.md
+++ b/issues/raw-camera-access-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/raw-camera-access/issues/17'
Title: Broken references in WebXR Raw Camera Access Module
---
-While crawling [WebXR Raw Camera Access Module](https://immersive-web.github.io/raw-camera-access/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [WebXR Raw Camera Access Module](https://immersive-web.github.io/raw-camera-access/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://www.w3.org/TR/webxr/#set-of-granted-features
* [ ] https://www.w3.org/TR/webxr/#requestanimationframe
diff --git a/issues/reporting-1-brokenlinks.md b/issues/reporting-1-brokenlinks.md
index a98946c7..e8e8a0ca 100644
--- a/issues/reporting-1-brokenlinks.md
+++ b/issues/reporting-1-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/reporting/issues/261'
Title: Broken references in Reporting API
---
-While crawling [Reporting API](https://w3c.github.io/reporting/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Reporting API](https://w3c.github.io/reporting/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://fetch.spec.whatwg.org/#concept-response-https-state
* [ ] https://fetch.spec.whatwg.org/#wait-for-a-response
* [ ] https://html.spec.whatwg.org/multipage/webappapis.html#creation-url
diff --git a/issues/savedata-discontinuedreferences.md b/issues/savedata-discontinuedreferences.md
index dd02eded..263ce812 100644
--- a/issues/savedata-discontinuedreferences.md
+++ b/issues/savedata-discontinuedreferences.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/savedata/issues/13'
Title: Normative references to discontinued specs in Save Data API
---
-While crawling [Save Data API](https://wicg.github.io/savedata/), the following normative referenced were detected as pointing to discontinued specifications:
+While crawling [Save Data API](https://wicg.github.io/savedata/), the following normative references were detected as pointing to discontinued specifications:
* [ ] [RFC7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html)
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/issues/service-workers-brokenlinks.md b/issues/service-workers-brokenlinks.md
index 6198ae76..4deb97c1 100644
--- a/issues/service-workers-brokenlinks.md
+++ b/issues/service-workers-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/ServiceWorker/issues/1669'
Title: Broken references in Service Workers Nightly
---
-While crawling [Service Workers Nightly](https://w3c.github.io/ServiceWorker/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Service Workers Nightly](https://w3c.github.io/ServiceWorker/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://w3c.github.io/push-api/#h-the-push-event
* [ ] https://w3c.github.io/push-api/#dfn-fire-the-push-event
* [ ] https://wicg.github.io/BackgroundSync/spec/#sync
diff --git a/issues/svg-aam-1.0-brokenlinks.md b/issues/svg-aam-1.0-brokenlinks.md
index 9157463d..95b969f2 100644
--- a/issues/svg-aam-1.0-brokenlinks.md
+++ b/issues/svg-aam-1.0-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/svg-aam/issues/23'
Title: Broken references in SVG Accessibility API Mappings
---
-While crawling [SVG Accessibility API Mappings](https://w3c.github.io/svg-aam/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [SVG Accessibility API Mappings](https://w3c.github.io/svg-aam/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://w3c.github.io/core-aam/#keyboard-focus
* [ ] https://w3c.github.io/core-aam/#exclude_elements2
* [ ] https://w3c.github.io/core-aam/#include_elements
diff --git a/issues/upgrade-insecure-requests-brokenlinks.md b/issues/upgrade-insecure-requests-brokenlinks.md
index 20df641b..fc31c585 100644
--- a/issues/upgrade-insecure-requests-brokenlinks.md
+++ b/issues/upgrade-insecure-requests-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webappsec-upgrade-insecure-requests/issues/33'
Title: Broken references in Upgrade Insecure Requests
---
-While crawling [Upgrade Insecure Requests](https://w3c.github.io/webappsec-upgrade-insecure-requests/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Upgrade Insecure Requests](https://w3c.github.io/webappsec-upgrade-insecure-requests/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://html.spec.whatwg.org/multipage/browsers.html#create-a-document-object
* [ ] https://html.spec.whatwg.org/multipage/webappapis.html#responsible-document
* [ ] https://html.spec.whatwg.org/multipage/webappapis.html#responsible-browsing-context
diff --git a/issues/web-otp-brokenlinks.md b/issues/web-otp-brokenlinks.md
index f0640b69..d8eb7325 100644
--- a/issues/web-otp-brokenlinks.md
+++ b/issues/web-otp-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/web-otp/issues/59'
Title: Broken references in WebOTP API
---
-While crawling [WebOTP API](https://wicg.github.io/web-otp/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [WebOTP API](https://wicg.github.io/web-otp/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://dom.spec.whatwg.org/#abortsignal-aborted-flag
* [ ] https://w3c.github.io/webappsec-credential-management/#collectfromcredentialstore-origin-options-sameoriginwithancestors
diff --git a/issues/webpackage-discontinuedreferences.md b/issues/webpackage-discontinuedreferences.md
index f5c537b5..2ff69f0a 100644
--- a/issues/webpackage-discontinuedreferences.md
+++ b/issues/webpackage-discontinuedreferences.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/WICG/webpackage/issues/885'
Title: Normative references to discontinued specs in Loading Signed Exchanges
---
-While crawling [Loading Signed Exchanges](https://wicg.github.io/webpackage/loading.html), the following normative referenced were detected as pointing to discontinued specifications:
+While crawling [Loading Signed Exchanges](https://wicg.github.io/webpackage/loading.html), the following normative references were detected as pointing to discontinued specifications:
* [ ] [RFC7230](https://httpwg.org/specs/rfc7230.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html),[rfc9112](https://httpwg.org/specs/rfc9112.html)
* [ ] [RFC7231](https://httpwg.org/specs/rfc7231.html) has been obsoleted by [rfc9110](https://httpwg.org/specs/rfc9110.html)
diff --git a/issues/webrtc-identity-brokenlinks.md b/issues/webrtc-identity-brokenlinks.md
index e09829a9..66cd662a 100644
--- a/issues/webrtc-identity-brokenlinks.md
+++ b/issues/webrtc-identity-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/w3c/webrtc-identity/issues/40'
Title: Broken references in Identity for WebRTC 1.0
---
-While crawling [Identity for WebRTC 1.0](https://w3c.github.io/webrtc-identity/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [Identity for WebRTC 1.0](https://w3c.github.io/webrtc-identity/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://w3c.github.io/webrtc-pc/#dfn-create-an-offer
* [ ] https://w3c.github.io/webrtc-pc/#dfn-create-an-answer
* [ ] https://www.w3.org/TR/webrtc/#dom-rtcconfiguration-peeridentity
diff --git a/issues/webxr-depth-sensing-1-brokenlinks.md b/issues/webxr-depth-sensing-1-brokenlinks.md
index ada71907..56352e7b 100644
--- a/issues/webxr-depth-sensing-1-brokenlinks.md
+++ b/issues/webxr-depth-sensing-1-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/depth-sensing/issues/40'
Title: Broken references in WebXR Depth Sensing Module
---
-While crawling [WebXR Depth Sensing Module](https://immersive-web.github.io/depth-sensing/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [WebXR Depth Sensing Module](https://immersive-web.github.io/depth-sensing/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://www.w3.org/TR/webxr/#feature-policy
* [ ] https://www.w3.org/TR/webxr/#xr-device-list-of-enabled-features
diff --git a/issues/webxr-hit-test-1-brokenlinks.md b/issues/webxr-hit-test-1-brokenlinks.md
index 69aeb211..b8104e68 100644
--- a/issues/webxr-hit-test-1-brokenlinks.md
+++ b/issues/webxr-hit-test-1-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/hit-test/issues/114'
Title: Broken references in WebXR Hit Test Module
---
-While crawling [WebXR Hit Test Module](https://immersive-web.github.io/hit-test/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [WebXR Hit Test Module](https://immersive-web.github.io/hit-test/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://www.w3.org/TR/webxr/#feature-policy
* [ ] https://www.w3.org/TR/webxr/#xrsession-list-of-enabled-features
diff --git a/issues/webxr-lighting-estimation-1-brokenlinks.md b/issues/webxr-lighting-estimation-1-brokenlinks.md
index bc9f2505..4c7e129c 100644
--- a/issues/webxr-lighting-estimation-1-brokenlinks.md
+++ b/issues/webxr-lighting-estimation-1-brokenlinks.md
@@ -4,7 +4,7 @@ Tracked: 'https://github.com/immersive-web/lighting-estimation/issues/58'
Title: Broken references in WebXR Lighting Estimation API Level 1
---
-While crawling [WebXR Lighting Estimation API Level 1](https://immersive-web.github.io/lighting-estimation/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [WebXR Lighting Estimation API Level 1](https://immersive-web.github.io/lighting-estimation/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://www.w3.org/TR/webxr/#xrsession-list-of-enabled-features
* [ ] https://www.w3.org/TR/webxrlayers-1/#session
diff --git a/issues/webxrlayers-1-brokenlinks.md b/issues/webxrlayers-1-brokenlinks.md
index f5010de1..b08e9e4e 100644
--- a/issues/webxrlayers-1-brokenlinks.md
+++ b/issues/webxrlayers-1-brokenlinks.md
@@ -5,7 +5,7 @@ Title: Broken references in WebXR Layers API Level 1
---
-While crawling [WebXR Layers API Level 1](https://immersive-web.github.io/layers/), the following links to other specifications were detected as pointing to non-existing anchors, which should be fixed:
+While crawling [WebXR Layers API Level 1](https://immersive-web.github.io/layers/), the following links to other specifications were detected as pointing to non-existing anchors:
* [ ] https://www.w3.org/TR/webxr/#animationframe
This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).
diff --git a/package.json b/package.json
index 53a82979..e59bd6d0 100644
--- a/package.json
+++ b/package.json
@@ -41,8 +41,6 @@
"commander": "12.1.0",
"gray-matter": "^4.0.3",
"jsdom": "^24.1.1",
- "node-fetch": "^2.6.5",
- "node-pandoc": "0.3.0",
"reffy": "^17.1.1",
"semver": "^7.3.5",
"webidl2": "^24.2.2"
diff --git a/src/cli/study-algorithms.js b/src/cli/study-algorithms.js
deleted file mode 100644
index 39844990..00000000
--- a/src/cli/study-algorithms.js
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env node
-
-import { loadCrawlResults } from '../lib/util.js';
-import studyAlgorithms from '../lib/study-algorithms.js';
-import loadJSON from '../lib/load-json.js';
-import { expandCrawlResult } from 'reffy';
-import path from 'node:path';
-
-function reportToConsole(results) {
- const toreport = [];
- for (const anomaly of results) {
- const spec = anomaly.specs[0];
- let entry = toreport.find(entry => entry.spec.shortname === spec.shortname);
- if (!entry) {
- entry = { spec, anomalies: [] };
- toreport.push(entry);
- }
- entry.anomalies.push(anomaly);
- }
- toreport.sort((entry1, entry2) => {
- return entry1.spec.title.localeCompare(entry2.spec.title);
- });
- for (const entry of toreport) {
- const spec = entry.spec;
- console.log(`- [${spec.title}](${spec.nightly?.url ?? spec.url})`);
- for (const anomaly of entry.anomalies) {
- console.log(` - ${anomaly.message}`);
- }
- }
-}
-
-async function main(crawlPath, anomalyType) {
- // Target the index file if needed
- if (!crawlPath.endsWith('index.json')) {
- crawlPath = path.join(crawlPath, 'index.json');
- }
-
- const crawl = await loadJSON(crawlPath);
- if (!crawl) {
- throw new Error("Impossible to read " + crawlPath);
- }
-
- const expanded = await expandCrawlResult(crawl, crawlPath.replace(/index\.json$/, ''), ['algorithms']);
- const report = studyAlgorithms(expanded.results);
- reportToConsole(report);
-}
-
-/**************************************************
-Main loop
-**************************************************/
-const crawlPath = process.argv[2];
-if (!crawlPath) {
- console.error('Web IDL analyzer must be called with a paths to crawl results as first parameter');
- process.exit(2);
-}
-main(crawlPath).catch(e => {
- console.error(e);
- process.exit(3);
-});
diff --git a/src/cli/study-backrefs.js b/src/cli/study-backrefs.js
deleted file mode 100644
index ac985ae1..00000000
--- a/src/cli/study-backrefs.js
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/env node
-/**
- * The backrefs analyzer takes links to a ED crawl folder and a TR crawl folder,
- * and creates a report that lists, for each spec:
- *
- * - Links to anchors that do not exist
- * - Links to anchors that no longer exist in the ED of the target spec
- * - Links to anchors that are not definitions or headings
- * - Links to definitions that are not exported
- * - Links to dated TR URLs
- * - Links to specs that should no longer be referenced
- * - Links to documents that look like specs but are unknown in Reffy
- * (likely not an anomaly per se)
- *
- * It also flags links that look like specs but that do not appear in the crawl
- * (most of these should be false positives).
- *
- * The backrefs analyzer can be called directly through:
- *
- * `node study-backrefs.js [root crawl folder]`
- *
- * where `root crawl folder` is the path to the root folder that contains `ed`
- * and `tr` subfolders. Alternatively, the analyzer may be called with two
- * arguments, one being the path to the ED crawl folder, another being the path
- * to the TR crawl folder.
- *
- * @module backrefs
- */
-
-import { loadCrawlResults } from '../lib/util.js';
-import studyBackrefs from '../lib/study-backrefs.js';
-import path from 'node:path';
-
-function reportToConsole(results) {
- for (const anomaly of results) {
- anomaly.specs = anomaly.specs.map(spec => {
- return { shortname: spec.shortname, url: spec.url, title: spec.title };
- });
- }
- const perSpec = {};
- for (const anomaly of results) {
- for (const spec of anomaly.specs) {
- if (!perSpec[spec.url]) {
- perSpec[spec.url] = { spec, anomalies: [] };
- }
- perSpec[spec.url].anomalies.push(anomaly);
- }
- }
-
- const anomalyTypes = [
- { name: 'brokenLinks', title: 'Links to anchors that do not exist' },
- { name: 'evolvingLinks', title: 'Links to anchors that no longer exist in the editor draft of the target spec' },
- { name: 'notDfn', title: 'Links to anchors that are not definitions or headings' },
- { name: 'notExported', title: 'Links to definitions that are not exported' },
- { name: 'datedUrls', title: 'Links to dated TR URLs' },
- { name: 'outdatedSpecs', title: 'Links to specs that should no longer be referenced' },
- { name: 'unknownSpecs', title: 'Links to documents that are not recognized as specs' }
- ];
- let report = '';
- Object.keys(perSpec)
- .sort((url1, url2) => perSpec[url1].spec.title.localeCompare(perSpec[url2].spec.title))
- .forEach(url => {
- const spec = perSpec[url].spec;
- const anomalies = perSpec[url].anomalies;
- report += `${spec.title}
\n\n`;
- for (const type of anomalyTypes) {
- const links = anomalies
- .filter(anomaly => anomaly.name === type.name)
- .map(anomaly => anomaly.message);
- if (links.length > 0) {
- report += `${type.title}:\n`;
- for (const link of links) {
- report += `* ${link}\n`;
- }
- report += '\n\n';
- }
- }
- report += ' \n';
- });
- console.log(report);
-}
-
-
-/**************************************************
-Main loop
-**************************************************/
-let edCrawlResultsPath = process.argv[2];
-let trCrawlResultsPath = process.argv[3];
-
-if (!edCrawlResultsPath) {
- console.error('Backrefs analyzer must be called with a paths to crawl results as first parameter');
- process.exit(2);
-}
-
-// If only one argument is provided, consider that it is the path to the
-// root folder of a crawl results, with "ed" and "tr" subfolders
-if (!trCrawlResultsPath) {
- trCrawlResultsPath = path.join(edCrawlResultsPath, 'tr');
- edCrawlResultsPath = path.join(edCrawlResultsPath, 'ed');
-}
-
-// Target the index file if needed
-if (!edCrawlResultsPath.endsWith('index.json')) {
- edCrawlResultsPath = path.join(edCrawlResultsPath, 'index.json');
-}
-if (!trCrawlResultsPath.endsWith('index.json')) {
- trCrawlResultsPath = path.join(trCrawlResultsPath, 'index.json');
-}
-
-// Analyze the crawl results
-loadCrawlResults(edCrawlResultsPath, trCrawlResultsPath)
- .then(async crawl => {
- // Donwload automatic map of multipages anchors in HTML spec
- let htmlFragments = {};
- try {
- htmlFragments = await fetch("https://html.spec.whatwg.org/multipage/fragment-links.json").then(r => r.json());
- } catch (err) {
- console.warn("Could not fetch HTML fragments data, may report false positive broken links on HTML spec", err);
- }
- return { crawl, htmlFragments };
- })
- .then(({ crawl, htmlFragments }) => studyBackrefs(crawl.ed, crawl.tr, htmlFragments))
- .then(reportToConsole)
- .catch(e => {
- console.error(e);
- process.exit(3);
- });
diff --git a/src/cli/study-webidl.js b/src/cli/study-webidl.js
deleted file mode 100644
index 7d396085..00000000
--- a/src/cli/study-webidl.js
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env node
-
-import { loadCrawlResults } from '../lib/util.js';
-import studyWebIdl from '../lib/study-webidl.js';
-import loadJSON from '../lib/load-json.js';
-import { expandCrawlResult } from 'reffy';
-import path from 'node:path';
-
-
-function reportToConsole(results) {
- results.forEach(anomaly => anomaly.specs = anomaly.specs.map(spec => {
- return { shortname: spec.shortname, url: spec.url };
- }));
- console.log(JSON.stringify(results, null, 2));
-}
-
-async function main(crawlPath) {
- // Target the index file if needed
- if (!crawlPath.endsWith('index.json')) {
- crawlPath = path.join(crawlPath, 'index.json');
- }
-
- const crawl = await loadJSON(crawlPath);
- if (!crawl) {
- throw new Error("Impossible to read " + crawlPath);
- }
-
- const expanded = await expandCrawlResult(crawl, crawlPath.replace(/index\.json$/, ''), 'idl');
- const report = studyWebIdl(expanded.results);
- reportToConsole(report);
-}
-
-/**************************************************
-Main loop
-**************************************************/
-const crawlPath = process.argv[2];
-if (!crawlPath) {
- console.error('Web IDL analyzer must be called with a paths to crawl results as first parameter');
- process.exit(2);
-}
-main(crawlPath).catch(e => {
- console.error(e);
- process.exit(3);
-});
diff --git a/src/lib/generate-report.js b/src/lib/generate-report.js
deleted file mode 100644
index 349a5f6c..00000000
--- a/src/lib/generate-report.js
+++ /dev/null
@@ -1,1020 +0,0 @@
-/**
- * The Markdown report generator takes an anomalies report as input and
- * generates a human-readable report in Markdown out of it. Depending on
- * parameters, the generated report may be a report per spec, a report per
- * issue, a dependencies report, or a diff report.
- *
- * @module markdownGenerator
- */
-
-import loadJSON from './load-json.js';
-
-
-/**
- * Compares specs for ordering by title
- */
-const byTitle = (a, b) => a.title.toUpperCase().localeCompare(b.title.toUpperCase());
-
-/**
- * Returns true when two arrays are equal
- */
-const arrayEquals = (a, b, prop) =>
- (a.length === b.length) &&
- a.every(item => !!(prop ? b.find(i => i[prop] === item[prop]) : b.find(i => i === item)));
-
-/**
- * Options for date formatting
- */
-const dateOptions = {
- day: '2-digit',
- month: 'long',
- year: 'numeric'
-};
-
-const toSlug = name => name.replace(/([A-Z])/g, s => s.toLowerCase())
- .replace(/[^a-z0-9]/g, '_')
- .replace(/_+/g, '_');
-
-/**
- * Helper function that outputs main crawl info about a spec
- *
- * @function
- */
-function writeCrawlInfo(spec, withHeader, w) {
- let wres = '';
- w = w || (msg => wres += (msg || '') + '\n');
-
- if (withHeader) {
- w('#### Spec info {.info}');
- }
- else {
- w('Spec info:');
- }
- w();
-
- let crawledUrl = spec.crawled || spec.latest;
- w('- Initial URL: [' + spec.url + '](' + spec.url + ')');
- w('- Crawled URL: [' + crawledUrl + '](' + crawledUrl + ')');
- if (spec.date) {
- w('- Crawled version: ' + spec.date);
- }
- if (spec.nightly) {
- w('- Editor\'s Draft: [' + spec.nightly.url + '](' + spec.nightly.url + ')');
- }
- if (spec.release) {
- w('- Latest published version: [' + spec.release.url + '](' + spec.release.url + ')');
- }
- if (spec.repository) {
- let githubcom = spec.repository.match(/^https:\/\/github.com\/([^\/]*)\/([^\/]*)/);
- let repositoryName = spec.repository;
- if (githubcom) {
- repositoryName = 'GitHub ' + githubcom[1] + '/' + githubcom[2];
- }
- w('- Repository: [' + repositoryName + '](' + spec.repository + ')');
- }
- w('- Shortname: ' + (spec.shortname || 'no shortname'));
- return wres;
-}
-
-
-function writeDependenciesInfo(spec, results, withHeader, w) {
- let wres = '';
- w = w || (msg => wres += (msg || '') + '\n');
-
- if (withHeader) {
- w('#### Known dependencies on this specification {.dependencies}');
- w();
- }
-
- if (spec.report.referencedBy.normative.length > 0) {
- w('Normative references to this spec from:');
- w();
- spec.report.referencedBy.normative.forEach(s => {
- w('- [' + s.title + '](' + s.crawled + ')');
- });
- }
- else {
- w('No normative reference to this spec from other specs.');
- }
- w();
-
- // Check the list of specifications that should normatively reference
- // this specification because they use IDL content it defines.
- let shouldBeReferencedBy = results.filter(s =>
- s.report.missingWebIdlRef &&
- s.report.missingWebIdlRef.find(i =>
- i.refs.find(ref => (ref.url === spec.url))));
- if (shouldBeReferencedBy.length > 0) {
- w('Although they do not, the following specs should also normatively' +
- ' reference this spec because they use IDL terms it defines:');
- w();
- shouldBeReferencedBy.forEach(s => {
- w('- [' + s.title + '](' + s.crawled + ')');
- });
- w();
- }
-
- if (spec.report.referencedBy.informative.length > 0) {
- w('Informative references to this spec from:');
- w();
- spec.report.referencedBy.informative.forEach(s => {
- w('- [' + s.title + '](' + s.crawled + ')');
- });
- }
- else {
- w('No informative reference to this spec from other specs.');
- }
- return wres;
-}
-
-/**
- * Outputs a human-readable Markdown anomaly report from a crawl report,
- * with one entry per spec.
- *
- * The function spits the report to the console.
- *
- * @function
- */
-function generateReportPerSpec(study) {
- var count = 0;
- let wres = '';
- const w = msg => wres += (msg || '') + '\n';
- const results = study.results;
-
- w('% ' + (study.title || 'Web specs analysis'));
- w('% Strudy');
- w('% ' + (new Date(study.date)).toLocaleDateString('en-US', dateOptions));
- w();
-
- const specReport = spec => {
- // Prepare anomaly flags
- let flags = ['spec'];
- if (spec.report.error) {
- flags.push('error');
- }
- else {
- if (!spec.report.ok) {
- flags.push('anomaly');
- }
- flags = flags.concat(Object.keys(spec.report)
- .filter(anomaly => (anomaly !== 'referencedBy'))
- .filter(anomaly => (Array.isArray(spec.report[anomaly]) ?
- (spec.report[anomaly].length > 0) :
- !!spec.report[anomaly])));
- }
- let attr = flags.reduce((res, anomaly) =>
- res + (res ? ' ' : '') + 'data-' + anomaly + '=true', '');
-
- w('### ' + spec.title + ' {' + attr + '}');
- w();
- writeCrawlInfo(spec, true, w);
- w();
-
- const report = spec.report;
- w('#### Potential issue(s) {.anomalies}');
- w();
- if (report.ok) {
- w('This specification looks good!');
- }
- else if (report.error) {
- w('The following network or parsing error occurred:');
- w('`' + report.error + '`');
- w();
- w('Reffy could not render this specification as a DOM tree and' +
- ' cannot say anything about it as a result. In particular,' +
- ' it cannot include content defined in this specification' +
- ' in the analysis of other specifications crawled in this' +
- ' report.');
- }
- else {
- if (report.noNormativeRefs) {
- w('- No normative references found');
- }
- if (report.hasInvalidIdl) {
- w('- Invalid WebIDL content found');
- }
- if (report.hasObsoleteIdl) {
- w('- Obsolete WebIDL constructs found');
- }
- if (report.noRefToWebIDL) {
- w('- Spec uses WebIDL but does not reference it normatively');
- }
- if (report.unknownExposedNames &&
- (report.unknownExposedNames.length > 0)) {
- w('- Unknown [Exposed] names used: ' +
- report.unknownExposedNames.map(name => '`' + name + '`').join(', '));
- }
- if (report.unknownIdlNames &&
- (report.unknownIdlNames.length > 0)) {
- w('- Unknown WebIDL names used: ' +
- report.unknownIdlNames.map(name => '`' + name + '`').join(', '));
- }
- if (report.redefinedIdlNames &&
- (report.redefinedIdlNames.length > 0)) {
- w('- WebIDL names also defined elsewhere: ');
- report.redefinedIdlNames.map(i => {
- w(' * `' + i.name + '` also defined in ' +
- i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' and '));
- });
- }
- if (report.missingWebIdlRef &&
- (report.missingWebIdlRef.length > 0)) {
- w('- Missing references for WebIDL names: ');
- report.missingWebIdlRef.map(i => {
- w(' * `' + i.name + '` defined in ' +
- i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' or '));
- });
- }
- [
- {prop: 'css', warning: false, title: 'No definition for CSS constructs'},
- {prop: 'idl', warning: false, title: 'No definition for IDL constructs'},
- {prop: 'css', warning: true, title: 'Possibly no definition for CSS constructs'},
- {prop: 'idl', warning: true, title: 'Possibly no definition for IDL constructs'}
- ].forEach(type => {
- if (report.missingDfns && report.missingDfns[type.prop] &&
- (report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).length > 0)) {
- w('- ' + type.title + ': ');
- report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).map(missing => {
- const exp = missing.expected;
- const found = missing.found;
- const foundFor = (found && found.for && found.for.length > 0) ?
- ' for ' + found.for.map(f => '`' + f + '`').join(',') :
- '';
- w(' * `' + exp.linkingText[0] + '`' +
- (exp.type ? ' with type `' + exp.type + '`' : '') +
- (missing.for ? ' for [`' + missing.for.linkingText[0] + '`](' + missing.for.href + ')' : '') +
- (found ? ', but found [`' + found.linkingText[0] + '`](' + found.href + ') with type `' + found.type + '`' + foundFor : ''));
- });
- }
- });
- if (report.missingLinkRef &&
- (report.missingLinkRef.length > 0)) {
- w('- Missing references for links: ');
- report.missingLinkRef.map(l => {
- w(' * [`' + l + '`](' + l + ')');
- });
- }
- if (report.inconsistentRef &&
- (report.inconsistentRef.length > 0)) {
- w('- Inconsistent references for links: ');
- report.inconsistentRef.map(l => {
- w(' * [`' + l.link + '`](' + l.link + '), related reference "' + l.ref.name + '" uses URL [`' + l.ref.url + '`](' + l.ref.url + ')');
- });
- }
- if (report.xrefs) {
- [
- { prop: 'notExported', title: 'External links to private terms' },
- { prop: 'notDfn', title: 'External links that neither target definitions nor headings' },
- { prop: 'brokenLinks', title: 'Broken external links' },
- { prop: 'evolvingLinks', title: 'External links to terms that no longer exist in the latest version of the targeted specification' },
- { prop: 'outdatedSpecs', title: 'External links to outdated specs' },
- { prop: 'datedUrls', title: 'External links that use a dated URL' }
- ].forEach(type => {
- if (report.xrefs[type.prop] && (report.xrefs[type.prop].length > 0)) {
- w('- ' + type.title + ':');
- report.xrefs[type.prop].map(l => {
- w(' * [`' + l + '`](' + l + ')');
- })
- }
- });
- }
- }
- w();
- writeDependenciesInfo(spec, results, true, w);
- w();
- w();
- };
-
-
- const orgs = [...new Set(study.results.map(r => r.organization))].sort();
- for (let org of orgs) {
- w(`# ${org} {#org-${toSlug(org)}}`);
- w();
- const groups = [...new Set(study.results.filter(r => r.organization === org).map(r => r.groups.map(g => g.name)).flat())].sort();
- for (let group of groups) {
- w(`## ${group} {#group-${toSlug(group)}}`);
- w();
- study.results
- .filter(r => r.organization === org && r.groups.find(g => g.name === group))
- .forEach(specReport);
- }
- }
-
- w();
- w();
-
- return wres;
-}
-
-
-/**
- * Outputs a human-readable Markdown anomaly report from a crawl report,
- * sorted by type of anomaly.
- *
- * The function spits the report to the console.
- *
- * @function
- */
-function generateReportPerIssue(study) {
- let wres = '';
- const w = msg => wres += (msg || '') + '\n';
-
- let count = 0;
- let results = study.results;
-
- w('% ' + (study.title || 'Web specs analysis'));
- w('% Strudy');
- w('% ' + (new Date(study.date)).toLocaleDateString('en-US', dateOptions));
- w();
-
- count = results.length;
- w('' + count + ' specification' + ((count > 1) ? 's' : '') + ' were crawled in this report.');
- w();
- w();
-
- let parsingErrors = results.filter(spec => spec.report.error);
- if (parsingErrors.length > 0) {
- w('## Specifications that could not be rendered');
- w();
- w('Reffy could not fetch or render these specifications for some reason.' +
- ' This may happen when a network error occurred or when a specification' +
- ' uses an old version of ReSpec.');
- w();
- count = 0;
- parsingErrors.forEach(spec => {
- count += 1;
- w('- [' + spec.title + '](' + spec.crawled + '): `' + spec.report.error + '`');
- });
- w();
- w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found');
- w();
- w();
-
- // Remove specs that could not be parsed from the rest of the report
- results = results.filter(spec => !spec.report.error);
- }
-
-
- count = 0;
- w('## Specifications without normative dependencies');
- w();
- results
- .filter(spec => spec.report.noNormativeRefs)
- .forEach(spec => {
- count += 1;
- w('- [' + spec.title + '](' + spec.crawled + ')');
- });
- w();
- w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found');
- if (count > 0) {
- w();
- w('Basically all specifications have normative dependencies on some other' +
- ' specification. Reffy could not find any normative dependencies for the' +
- ' specifications mentioned above, which seems strange.');
- }
- w();
- w();
-
- count = 0;
- w('## List of specifications with invalid WebIDL content');
- w();
- results
- .filter(spec => spec.report.hasInvalidIdl)
- .forEach(spec => {
- count += 1;
- w('- [' + spec.title + '](' + spec.crawled + ')');
- });
- w();
- w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found');
- if (count > 0) {
- w();
- w('WebIDL continues to evolve. Strudy may incorrectly report as invalid' +
- ' perfectly valid WebIDL content if the specification uses bleeding-edge' +
- ' WebIDL features');
- }
- w();
- w();
-
- count = 0;
- w('## List of specifications with obsolete WebIDL constructs');
- w();
- results
- .filter(spec => spec.report.hasObsoleteIdl)
- .forEach(spec => {
- count += 1;
- w('- [' + spec.title + '](' + spec.crawled + ')');
- });
- w();
- w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found');
- if (count > 0) {
- w();
- w('A typical example is the use of `[]` instead of `FrozenArray`.');
- }
- w();
- w();
-
- count = 0;
- w('## Specifications that use WebIDL but do not reference the WebIDL spec');
- w();
- results.forEach(spec => {
- if (spec.report.noRefToWebIDL) {
- count += 1;
- w('- [' + spec.title + '](' + spec.crawled + ')');
- }
- });
- w();
- w('=> ' + count + ' specification' + ((count > 1) ? 's' : '') + ' found');
- if (count > 0) {
- w();
- ('All specifications that define WebIDL content should have a ' +
- ' **normative** reference to the WebIDL specification. ' +
- ' Some specifications listed here may reference the WebIDL' +
- ' specification informatively, but that is not enough!');
- }
- w();
- w();
-
-
- count = 0;
- w('## List of [Exposed] names not defined in the specifications crawled');
- w();
- var idlNames = {};
- results.forEach(spec => {
- if (!spec.report.unknownExposedNames ||
- (spec.report.unknownExposedNames.length === 0)) {
- return;
- }
- spec.report.unknownExposedNames.forEach(name => {
- if (!idlNames[name]) {
- idlNames[name] = [];
- }
- idlNames[name].push(spec);
- });
- });
- Object.keys(idlNames).sort().forEach(name => {
- count += 1;
- w('- `' + name + '` used in ' +
- idlNames[name].map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(', '));
- });
- w();
- w('=> ' + count + ' [Exposed] name' + ((count > 1) ? 's' : '') + ' found');
- if (count > 0) {
- w();
- w('Please keep in mind that Strudy only knows about IDL terms defined in the' +
- ' specifications that were crawled **and** that do not have invalid IDL content.');
- }
- w();
- w();
-
-
- count = 0;
- w('## List of WebIDL names not defined in the specifications crawled');
- w();
- idlNames = {};
- results.forEach(spec => {
- if (!spec.report.unknownIdlNames ||
- (spec.report.unknownIdlNames.length === 0)) {
- return;
- }
- spec.report.unknownIdlNames.forEach(name => {
- if (!idlNames[name]) {
- idlNames[name] = [];
- }
- idlNames[name].push(spec);
- });
- });
- Object.keys(idlNames).sort().forEach(name => {
- count += 1;
- w('- `' + name + '` used in ' +
- idlNames[name].map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(', '));
- });
- w();
- w('=> ' + count + ' WebIDL name' + ((count > 1) ? 's' : '') + ' found');
- if (count > 0) {
- w();
- w('Some of them may be type errors in specs (e.g. "int" does not exist, "Array" cannot be used on its own, etc.)');
- w('Also, please keep in mind that Strudy only knows about IDL terms defined in the' +
- ' specifications that were crawled **and** that do not have invalid IDL content.');
- }
- w();
- w();
-
- count = 0;
- w('## List of WebIDL names defined in more than one spec');
- w();
- idlNames = {};
- results.forEach(spec => {
- if (!spec.report.redefinedIdlNames ||
- (spec.report.redefinedIdlNames.length === 0)) {
- return;
- }
- spec.report.redefinedIdlNames.forEach(i => {
- if (!idlNames[i.name]) {
- idlNames[i.name] = [];
- }
- idlNames[i.name].push(spec);
- });
- });
- Object.keys(idlNames).sort().forEach(name => {
- count += 1;
- w('- `' + name + '` defined in ' +
- idlNames[name].map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' and '));
- });
- w();
- w('=> ' + count + ' WebIDL name' + ((count > 1) ? 's' : '') + ' found');
- if (count > 0) {
- w();
- w('"There can be only one"...');
- }
- w();
- w();
-
- count = 0;
- var countrefs = 0;
- w('## Missing references for WebIDL names');
- w();
- results.forEach(spec => {
- if (spec.report.missingWebIdlRef &&
- (spec.report.missingWebIdlRef.length > 0)) {
- count += 1;
- if (spec.report.missingWebIdlRef.length === 1) {
- countrefs += 1;
- let i = spec.report.missingWebIdlRef[0];
- w('- [' + spec.title + '](' + spec.crawled + ')' +
- ' uses `' + i.name + '` but does not reference ' +
- i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' or '));
- }
- else {
- w('- [' + spec.title + '](' + spec.crawled + ') uses:');
- spec.report.missingWebIdlRef.map(i => {
- countrefs += 1;
- w(' * `' + i.name + '` but does not reference ' +
- i.refs.map(ref => ('[' + ref.title + '](' + ref.crawled + ')')).join(' or '));
- });
- }
- }
- });
- w();
- w('=> ' + countrefs + ' missing reference' + ((countrefs > 1) ? 's' : '') +
- ' for IDL definitions found in ' + count + ' specification' +
- ((count > 1) ? 's' : ''));
- w();
- w();
-
- [
- {prop: 'css', warning: false, title: 'No definition for CSS constructs'},
- {prop: 'idl', warning: false, title: 'No definition for IDL constructs'},
- {prop: 'css', warning: true, title: 'Possibly no definition for CSS constructs'},
- {prop: 'idl', warning: true, title: 'Possibly no definition for IDL constructs'}
- ].forEach(type => {
- count = 0;
- countrefs = 0;
- w('## ' + type.title);
- w();
-
- results.forEach(spec => {
- if (spec.report.missingDfns &&
- spec.report.missingDfns[type.prop] &&
- (spec.report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).length > 0)) {
- count += 1;
-
- w('- [' + spec.title + '](' + spec.crawled + '):');
- spec.report.missingDfns[type.prop].filter(r => !!r.warning === type.warning).map(missing => {
- countrefs += 1;
- const exp = missing.expected;
- const found = missing.found;
- const foundFor = (found && found.for && found.for.length > 0) ?
- ' for ' + found.for.map(f => '`' + f + '`').join(',') :
- '';
- w(' * `' + exp.linkingText[0] + '`' +
- (exp.type ? ' with type `' + exp.type + '`' : '') +
- (missing.for ? ' for [`' + missing.for.linkingText[0] + '`](' + missing.for.href + ')' : '') +
- (found ? ', but found [`' + found.linkingText[0] + '`](' + found.href + ') with type `' + found.type + '`' + foundFor : ''));
- });
- }
- });
-
- w();
- w('=> ' + countrefs + ' construct' + ((countrefs > 1) ? 's' : '') +
- ' without definition found in ' + count + ' specification' +
- ((count > 1) ? 's' : ''));
- w();
- w();
- });
-
-
- count = 0;
- countrefs = 0;
- w('## Missing references based on document links');
- w();
- results.forEach(spec => {
- if (spec.report.missingLinkRef &&
- (spec.report.missingLinkRef.length > 0)) {
- count += 1;
- if (spec.report.missingLinkRef.length === 1) {
- countrefs += 1;
- let l = spec.report.missingLinkRef[0];
- w('- [' + spec.title + '](' + spec.crawled + ')' +
- ' links to [`' + l + '`](' + l + ') but does not list it' +
- ' in its references');
- }
- else {
- w('- [' + spec.title + '](' + spec.crawled + ') links to:');
- spec.report.missingLinkRef.forEach(l => {
- countrefs++;
- w(' * [`' + l + '`](' + l + ') but does not list it ' +
- 'in its references');
- });
- }
- }
- });
- w();
- w('=> ' + countrefs + ' missing reference' + ((countrefs > 1) ? 's' : '') +
- ' for links found in ' + count + ' specification' +
- ((count > 1) ? 's' : ''));
- if (count > 0) {
- w();
- w('Any link to an external document from within a specification should' +
- ' trigger the creation of a corresponding entry in the references' +
- ' section.');
- w();
- w('Note Strudy only reports on links to "well-known" specs and ignores' +
- ' links to non-usual specs (e.g. PDF documents, etc.) for now.');
- }
- w();
- w();
-
- count = 0;
- countrefs = 0;
- w('## Reference URL is inconsistent with URL used in document links');
- w();
- results.forEach(spec => {
- if (spec.report.inconsistentRef &&
- (spec.report.inconsistentRef.length > 0)) {
- count += 1;
- if (spec.report.inconsistentRef.length === 1) {
- countrefs += 1;
- let l = spec.report.inconsistentRef[0];
- w('- [' + spec.title + '](' + spec.crawled + ')' +
- ' links to [`' + l.link + '`](' + l.link + ') but related reference "' + l.ref.name + '" uses URL [`' + l.ref.url + '`](' + l.ref.url + ')');
- }
- else {
- w('- [' + spec.title + '](' + spec.crawled + ') links to:');
- spec.report.inconsistentRef.forEach(l => {
- countrefs++;
- w(' * [`' + l.link + '`](' + l.link + ') but related reference "' + l.ref.name + '" uses URL [`' + l.ref.url + '`](' + l.ref.url + ')');
- });
- }
- }
- });
- w();
- w('=> ' + countrefs + ' inconsistent reference' + ((countrefs > 1) ? 's' : '') +
- ' for links found in ' + count + ' specification' +
- ((count > 1) ? 's' : ''));
- if (count > 0) {
- w();
- w('Links in the body of a specification should be to the same document' +
- ' as that pointed to by the related reference in the References section.' +
- ' The specifications reported here use a different URL. For instance,' +
- ' they may use a link to the Editor\'s Draft but target the latest' +
- ' published version in the References section.' +
- ' There should be some consistency across the specification.');
- }
- w();
- w();
-
- [
- { prop: 'notExported', title: 'External links to private terms' },
- { prop: 'notDfn', title: 'External links that neither target definitions nor headings' },
- { prop: 'brokenLinks', title: 'Broken external links' },
- { prop: 'evolvingLinks', title: 'External links to terms that no longer exist in the latest version of the targeted specification' },
- { prop: 'outdatedSpecs', title: 'External links to outdated specs' },
- { prop: 'datedUrls', title: 'External links that use a dated URL' }
- ].forEach(type => {
- count = 0;
- countrefs = 0;
- w('## ' + type.title);
- w();
-
- results.forEach(spec => {
- if (spec.report.xrefs &&
- spec.report.xrefs[type.prop] &&
- (spec.report.xrefs[type.prop].length > 0)) {
- count += 1;
-
- w('- [' + spec.title + '](' + spec.crawled + '):');
- spec.report.xrefs[type.prop].map(l => {
- countrefs += 1;
- w(' * [`' + l + '`](' + l + ')');
- });
- }
- });
-
- w();
- w('=> ' + countrefs + ' problematic external link' + ((countrefs > 1) ? 's' : '') +
- ' found in ' + count + ' specification' +
- ((count > 1) ? 's' : ''));
- w();
- w();
- });
-
-
- return wres;
-}
-
-
-/**
- * Outputs a human-readable Markdown dependencies report from a crawl report,
- * one entry per spec.
- *
- * The function spits the report to the console.
- *
- * @function
- */
-function generateDependenciesReport(study) {
- let wres = '';
- const w = msg => wres += (msg || '') + '\n';
-
- let count = 0;
- const results = study.results;
-
- w('# Web specs dependencies report');
- w();
- w('Strudy is an analysis tool for Web spec crawl reports created by Reffy.' +
- ' It studies extracts created during the crawl.');
- w();
- w('The report below lists incoming links for each specification, in other words the list' +
- ' of specifications that normatively or informatively reference a given specification.');
- w();
- w('By definition, Strudy only knows about incoming links from specifications that have been' +
- ' crawled and that could successfully be parsed. Other specifications that Strudy does' +
- ' not know anything about may reference specifications listed here.');
- w();
- results.forEach(spec => {
- w('## ' + spec.title);
- w();
- writeCrawlInfo(spec, false, w);
- w();
- writeDependenciesInfo(spec, results, false, w);
- w();
- w();
- });
-
- return wres;
-}
-
-
-/**
- * Outputs a human-readable diff between two crawl reports, one entry per spec.
- *
- * The function spits the report to the console.
- *
- * @function
- */
-function generateDiffReport(study, refStudy, options) {
- options = options || {};
- let wres = '';
- const w = msg => wres += (msg || '') + '\n';
-
- const results = study.results;
- const resultsRef = refStudy.results;
-
- // Compute diff for all specs
- // (note we're only interested in specs that are part in the new crawl,
- // and won't report on specs that were there before and got dropped)
- let resultsDiff = results.map(spec => {
- let ref = resultsRef.find(s => s.url === spec.url) || {
- missing: true,
- report: {
- unknownExposedNames: [],
- unknownIdlNames: [],
- redefinedIdlNames: [],
- missingWebIdlRef: [],
- missingLinkRef: [],
- inconsistentRef: []
- }
- };
-
- const report = spec.report;
- const reportRef = ref.report;
-
- const getSimpleDiff = prop => (report[prop] !== reportRef[prop]) ?
- {
- ins: (typeof report[prop] !== 'undefined') ? report[prop] : null,
- del: (typeof reportRef[prop] !== 'undefined') ? reportRef[prop] : null
- } :
- null;
- const getArrayDiff = (prop, key) =>
- (!arrayEquals(report[prop], reportRef[prop], key) &&
- (!options.onlyNew || report[prop].find(item => !reportRef[prop].find(i => (key ? i[key] === item[key] : i === item))))) ?
- {
- ins: report[prop].filter(item => !reportRef[prop].find(i => (key ? i[key] === item[key] : i === item))),
- del: reportRef[prop].filter(item => !report[prop].find(i => (key ? i[key] === item[key] : i === item)))
- } :
- null;
-
- // Compute diff between new and ref report for that spec
- const diff = {
- title: (spec.title !== ref.title) ? {
- ins: (typeof spec.title !== 'undefined') ? spec.title : null,
- del: (typeof ref.title !== 'undefined') ? ref.title : null
- } : null,
- ok: getSimpleDiff('ok'),
- error: getSimpleDiff('error'),
- noNormativeRefs: getSimpleDiff('noNormativeRefs'),
- noRefToWebIDL: getSimpleDiff('noRefToWebIDL'),
- hasInvalidIdl: getSimpleDiff('hasInvalidIdl'),
- hasObsoleteIdl: getSimpleDiff('hasObsoleteIdl'),
- unknownExposedNames: getArrayDiff('unknownExposedNames'),
- unknownIdlNames: getArrayDiff('unknownIdlNames'),
- redefinedIdlNames: getArrayDiff('redefinedIdlNames', 'name'),
- missingWebIdlRef: getArrayDiff('missingWebIdlRef', 'name'),
- missingLinkRef: getArrayDiff('missingLinkRef'),
- inconsistentRef: getArrayDiff('inconsistentRef', 'link')
- };
-
- return {
- title: spec.title,
- shortname: spec.shortname,
- date: spec.date,
- url: spec.url,
- release: spec.release,
- nightly: spec.nightly,
- repository: spec.repository,
- isNewSpec: ref.missing,
- hasDiff: Object.keys(diff).some(key => diff[key] !== null),
- diff
- };
- });
-
- if (!options.onlyNew) {
- resultsDiff = resultsDiff.concat(resultsRef
- .map(spec => {
- let ref = results.find(s => s.url === spec.url);
- if (ref) return null;
- return {
- title: spec.title,
- shortname: spec.shortname,
- date: spec.date,
- url: spec.url,
- release: spec.release,
- nightly: spec.nightly,
- crawled: spec.crawled,
- repository: spec.repository,
- isUnknownSpec: true,
- hasDiff: true
- };
- })
- .filter(spec => !!spec));
- resultsDiff.sort(byTitle);
- }
-
- w('% Diff between report from "' +
- (new Date(study.date)).toLocaleDateString('en-US', dateOptions) +
- '" and reference report from "' +
- (new Date(refStudy.date)).toLocaleDateString('en-US', dateOptions) +
- '"');
- w('% Strudy');
- w('% ' + (new Date(study.date)).toLocaleDateString('en-US', dateOptions));
- w();
-
- resultsDiff.forEach(spec => {
- // Nothing to report if crawl result is the same
- if (!spec.hasDiff) {
- return;
- }
-
- w('## ' + spec.title);
- w();
-
- let crawledUrl = spec.crawled || spec.latest;
- w('- Initial URL: [' + spec.url + '](' + spec.url + ')');
- w('- Crawled URL: [' + crawledUrl + '](' + crawledUrl + ')');
- if (spec.nightly && (spec.nightly.url !== crawledUrl)) {
- w('- Editor\'s Draft: [' + spec.nightly.url + '](' + spec.nightly.url + ')');
- }
- if (spec.repository) {
- let githubcom = spec.repository.match(/^https:\/\/github.com\/([^\/]*)\/([^\/]*)/);
- let repositoryName = spec.repository;
- if (githubcom) {
- repositoryName = 'GitHub ' + githubcom[1] + '/' + githubcom[2];
- }
- w('- Repository: [' + repositoryName + '](' + spec.repository + ')');
- }
-
- if (spec.isNewSpec) {
- w('- This specification was not in the reference crawl report.');
- w();
- w();
- return;
- }
-
- if (spec.isUnknownSpec) {
- w('- This specification is not in the new crawl report.');
- w();
- w();
- return;
- }
-
- const diff = spec.diff;
- const simpleDiff = prop =>
- ((diff[prop].ins !== null) ? '*INS* ' + diff[prop].ins : '') +
- (((diff[prop].ins !== null) && (diff[prop].del !== null)) ? ' / ' : '') +
- ((diff[prop].del !== null) ? '*DEL* ' + diff[prop].del : '');
- const arrayDiff = (prop, key) =>
- ((diff[prop].ins.length > 0) ? '*INS* ' + diff[prop].ins.map(i => (key ? i[key] : i)).join(', ') : '') +
- (((diff[prop].ins.length > 0) && (diff[prop].del.length > 0)) ? ' / ' : '') +
- ((diff[prop].del.length > 0) ? '*DEL* ' + diff[prop].del.map(i => (key ? i[key] : i)).join(', ') : '');
-
- [
- { title: 'Spec title', prop: 'title', diff: 'simple' },
- { title: 'Spec is OK', prop: 'ok', diff: 'simple' },
- { title: 'Spec could not be rendered', prop: 'error', diff: 'simple' },
- { title: 'No normative references found', prop: 'noNormativeRefs', diff: 'simple' },
- { title: 'Invalid WebIDL content found', prop: 'hasInvalidIdl', diff: 'simple' },
- { title: 'Obsolete WebIDL constructs found', prop: 'hasObsoleteIdl', diff: 'simple' },
- { title: 'Spec does not reference WebIDL normatively', prop: 'noRefToWebIDL', diff: 'simple' },
- { title: 'Unknown [Exposed] names used', prop: 'unknownExposedNames', diff: 'array' },
- { title: 'Unknown WebIDL names used', prop: 'unknownIdlNames', diff: 'array' },
- { title: 'WebIDL names also defined elsewhere', prop: 'redefinedIdlNames', diff: 'array', key: 'name' },
- { title: 'Missing references for WebIDL names', prop: 'missingWebIdlRef', diff: 'array', key: 'name' },
- { title: 'Missing references for links', prop: 'missingLinkRef', diff: 'array' },
- { title: 'Inconsistent references for links', prop: 'inconsistentRef', diff: 'array', key: 'link' }
- ].forEach(item => {
- // Only report actual changes, and don't report other changes when
- // the spec could not be rendered in one of the crawl reports
- if (diff[item.prop] && ((item.prop === 'error') || (item.prop === 'title') || (item.prop === 'latest') || !diff.error)) {
- w('- ' + item.title + ': ' + ((item.diff === 'simple') ?
- simpleDiff(item.prop) :
- arrayDiff(item.prop, item.key)));
- }
- });
- w();
- w();
- });
-
- return wres;
-}
-
-
-/**
- * Main function that generates a Markdown report from a study file.
- *
- * @function
- * @param {String} studyFile Path to the study file to parse, or study report
- * @param {Object} options Type of report to generate and other options
- * @return {String} The generated report
- */
-async function generateReport(studyFile, options) {
- options = options || {};
- if (!studyFile) {
- throw new Error('Required filename parameter missing');
- }
- if (options.diffReport && !options.refStudyFile) {
- throw new Error('Required filename to reference crawl for diff missing');
- }
-
- const study = typeof studyFile === 'string' ?
- (await loadJSON(studyFile)) :
- studyFile;
- if (!study) {
- throw new Error('Impossible to read ' + studyFile);
- }
-
- let refStudy = {};
- if (options.diffReport) {
- if (options.refStudyFile.startsWith('http')) {
- try {
- let response = await fetch(options.refStudyFile, { nolog: true });
- refStudy = await response.json();
- }
- catch (e) {
- throw new Error('Impossible to fetch ' + options.refStudyFile + ': ' + e);
- }
- return generateDiffReport(study, refStudy, { onlyNew: options.onlyNew });
- }
- else {
- refStudy = await loadJSON(options.refStudyFile);
- if (!refStudy) {
- throw new Error('Impossible to read ' + options.refStudyFile);
- }
- return generateDiffReport(study, refStudy, { onlyNew: options.onlyNew });
- }
- }
- else if (options.depReport) {
- return generateDependenciesReport(study);
- }
- else if (options.perSpec) {
- return generateReportPerSpec(study);
- }
- else {
- return generateReportPerIssue(study);
- }
- return report;
-}
-
-
-/**************************************************
-Export methods for use as module
-**************************************************/
-export default generateReport;
diff --git a/src/lib/study-algorithms.js b/src/lib/study-algorithms.js
index 76f76fd9..2466a282 100644
--- a/src/lib/study-algorithms.js
+++ b/src/lib/study-algorithms.js
@@ -1,11 +1,4 @@
import { JSDOM } from 'jsdom';
-import { recordCategorizedAnomaly } from './util.js';
-
-const possibleAnomalies = [
- 'missingTaskForPromise',
- 'missingTaskForEvent'
-];
-
/**
* Normalize whitespaces in string to make analysis easier
@@ -57,9 +50,8 @@ function nestParallelSteps(algo) {
/**
* Main function, study all algorithms
*/
-function studyAlgorithms(edResults) {
+function studyAlgorithms(specs) {
const report = [];
- const recordAnomaly = recordCategorizedAnomaly(report, 'algorithms', possibleAnomalies);
// Return human-friendly markdown that identifies the given algorithm
function getAlgoName(algo) {
@@ -95,11 +87,19 @@ function studyAlgorithms(edResults) {
// https://w3c.github.io/clipboard-apis/#dom-clipboard-read
!html.includes('systemClipboardRepresentation')
) {
- recordAnomaly(spec, 'missingTaskForPromise', `${getAlgoName(algo)} has a parallel step that resolves/rejects a promise directly`);
+ report.push({
+ name: 'missingTaskForPromise',
+ message: `${getAlgoName(algo)} has a parallel step that resolves/rejects a promise directly`,
+ spec
+ });
return true;
}
else if (html.match(/fire an?( \w+)? event/i)) {
- recordAnomaly(spec, 'missingTaskForEvent', `${getAlgoName(algo)} has a parallel step that fires an event directly`);
+ report.push({
+ name: 'missingTaskForEvent',
+ message: `${getAlgoName(algo)} has a parallel step that fires an event directly`,
+ spec
+ });
return true;
}
}
@@ -133,13 +133,10 @@ function studyAlgorithms(edResults) {
return anomalyFound;
}
- // We're only interested in specs that define algorithms
- const specs = edResults.filter(spec => !!spec.algorithms);
-
// Study algorithms in turn.
// Note: the root level of each algorithm is its first step. It may say
// something like "run these steps in parallel" in particular.
- for (const spec of specs) {
+ for (const spec of specs.filter(spec => !!spec.algorithms)) {
for (const algo of spec.algorithms) {
nestParallelSteps(algo);
studyAlgorithmStep(spec, algo, algo);
diff --git a/src/lib/study-backrefs.js b/src/lib/study-backrefs.js
index 2be544fa..0d8612d8 100644
--- a/src/lib/study-backrefs.js
+++ b/src/lib/study-backrefs.js
@@ -1,18 +1,3 @@
-import { loadCrawlResults, recordCategorizedAnomaly } from './util.js';
-import { fileURLToPath } from 'node:url';
-
-const possibleAnomalies = [
- 'brokenLinks',
- 'datedUrls',
- 'evolvingLinks',
- 'frailLinks',
- 'nonCanonicalRefs',
- 'notDfn',
- 'notExported',
- 'outdatedSpecs',
- 'unknownSpecs'
-];
-
/**
* The backrefs analyzer only checks links to other specs. This function returns
* true when a link does target a spec, and false if it targets something else
@@ -39,57 +24,91 @@ const matchSpecUrl = url =>
TODO: DRY
Copied from browser-specs/src/compute-shortname.js
*/
-function computeShortname (url) {
- function parseUrl (url) {
+function computeShortname(url) {
+ function parseUrl(url) {
// Handle /TR/ URLs
- const w3cTr = url.match(/^https?:\/\/(?:www\.)?w3\.org\/TR\/([^/]+)\/$/);
+ const w3cTr = url.match(/^https?:\/\/(?:www\.)?w3\.org\/TR\/([^\/]+)\/$/);
if (w3cTr) {
return w3cTr[1];
}
// Handle WHATWG specs
- const whatwg = url.match(/\/\/(.+)\.spec\.whatwg\.org\/?/);
+ const whatwg = url.match(/\/\/(.+)\.spec\.whatwg\.org\//);
if (whatwg) {
- return whatwg[1];
+ return whatwg[1];
}
// Handle TC39 Proposals
- const tc39 = url.match(/\/\/tc39\.es\/proposal-([^/]+)\/$/);
+ const tc39 = url.match(/\/\/tc39\.es\/proposal-([^\/]+)\/$/);
if (tc39) {
- return 'tc39-' + tc39[1];
+ return "tc39-" + tc39[1];
}
+
// Handle Khronos extensions
- const khronos = url.match(/https:\/\/registry\.khronos\.org\/webgl\/extensions\/([^/]+)\/$/);
+ const khronos = url.match(/https:\/\/registry\.khronos\.org\/webgl\/extensions\/([^\/]+)\/$/);
if (khronos) {
- return khronos[1];
+ return khronos[1];
}
// Handle extension specs defined in the same repo as the main spec
// (e.g. generate a "gamepad-extensions" name for
// https://w3c.github.io/gamepad/extensions.html")
- const ext = url.match(/\/.*\.github\.io\/([^/]+)\/(extensions?)\.html$/);
+ const ext = url.match(/\/.*\.github\.io\/([^\/]+)\/(extensions?)\.html$/);
if (ext) {
return ext[1] + '-' + ext[2];
}
// Handle draft specs on GitHub, excluding the "webappsec-" prefix for
// specifications developed by the Web Application Security Working Group
- const github = url.match(/\/.*\.github\.io\/(?:webappsec-)?([^/]+)\//);
+ const github = url.match(/\/.*\.github\.io\/(?:webappsec-)?([^\/]+)\//);
if (github) {
- return github[1];
+ return github[1];
}
// Handle CSS WG specs
- const css = url.match(/\/drafts\.(?:csswg|fxtf|css-houdini)\.org\/([^/]+)\//);
+ const css = url.match(/\/drafts\.(?:csswg|fxtf|css-houdini)\.org\/([^\/]+)\//);
if (css) {
return css[1];
}
// Handle SVG drafts
- const svg = url.match(/\/svgwg\.org\/specs\/(?:svg-)?([^/]+)\//);
+ const svg = url.match(/\/svgwg\.org\/specs\/(?:svg-)?([^\/]+)\//);
if (svg) {
- return 'svg-' + svg[1];
+ return "svg-" + svg[1];
+ }
+
+ // Handle IETF RFCs
+ const rfcs = url.match(/\/www.rfc-editor\.org\/rfc\/(rfc[0-9]+)/);
+ if (rfcs) {
+ return rfcs[1];
+ }
+
+ // Handle IETF group drafts
+ const ietfDraft = url.match(/\/datatracker\.ietf\.org\/doc\/html\/draft-ietf-[^\-]+-([^\/]+)/);
+ if (ietfDraft) {
+ return ietfDraft[1];
+ }
+
+ // Handle IETF individual drafts, stripping group name
+ // TODO: retrieve the list of IETF groups to make sure that the group name
+ // is an actual group name and not the beginning of the shortname:
+ // https://datatracker.ietf.org/api/v1/group/group/
+ // (multiple requests needed due to pagination, "?limit=1000" is the max)
+ const ietfIndDraft = url.match(/\/datatracker\.ietf\.org\/doc\/html\/draft-[^\-]+-([^\/]+)/);
+ if (ietfIndDraft) {
+ if (ietfIndDraft[1].indexOf('-') !== -1) {
+ return ietfIndDraft[1].slice(ietfIndDraft[1].indexOf('-') + 1);
+ }
+ else {
+ return ietfIndDraft[1];
+ }
+ }
+
+ // Handle TAG findings
+ const tag = url.match(/^https?:\/\/(?:www\.)?w3\.org\/2001\/tag\/doc\/([^\/]+)\/?$/);
+ if (tag) {
+ return tag[1];
}
// Return name when one was given
@@ -107,7 +126,7 @@ function computeShortname (url) {
// Latin characters (a-z letters, digits, underscore and "-"), and that it
// only contains a dot for fractional levels at the end of the name
// (e.g. "blah-1.2" is good but "blah.blah" and "blah-3.1-blah" are not)
- if (!name.match(/^[\w-]+((?<=-\d+)\.\d+)?$/)) {
+ if (!name.match(/^[\w\-]+((?<=\-v?\d+)\.\d+)?$/)) {
throw new Error(`Specification name contains unexpected characters: ${name} (extracted from ${url})`);
}
@@ -234,18 +253,21 @@ const matchAnchor = (url, anchor) => link => {
return link === (url + '#' + anchor) || link === (url + '#' + encodeURIComponent(anchor));
};
-function studyBackrefs (edResults, trResults = [], htmlFragments = {}, shortnameFilter) {
- trResults = trResults || [];
+async function studyBackrefs(specs, { crawlResults = null, trResults = [], htmlFragments = null } = {}) {
+ crawlResults = crawlResults ?? specs;
const report = [];
- edResults.forEach(spec => {
- if (shortnameFilter && spec.shortname !== shortnameFilter) return;
- studyLinks(spec, spec.links?.rawlinks, report, edResults, trResults, htmlFragments);
+ // Donwload automatic map of multipages anchors in HTML spec
+ const fragmentsUrl = 'https://html.spec.whatwg.org/multipage/fragment-links.json';
+ htmlFragments = htmlFragments ?? await fetch(fragmentsUrl).then(r => r.json());
+
+ specs.forEach(spec => {
+ studyLinks(spec, spec.links?.rawlinks, report, crawlResults, trResults, htmlFragments);
// given the current limitation of classification of links for bikeshed
// https://github.com/w3c/reffy/issues/1584
// we also check autolinks for bikeshed specs
if (spec.generator === "bikeshed") {
- studyLinks(spec, spec.links?.autolinks, report, edResults, trResults, htmlFragments);
+ studyLinks(spec, spec.links?.autolinks, report, crawlResults, trResults, htmlFragments);
}
});
return report;
@@ -254,7 +276,9 @@ function studyBackrefs (edResults, trResults = [], htmlFragments = {}, shortname
function studyLinks(spec, links, report, edResults, trResults, htmlFragments) {
if (!links) return;
- const recordAnomaly = recordCategorizedAnomaly(report, 'links', possibleAnomalies);
+ function recordAnomaly(spec, name, message) {
+ report.push({ name, message, spec });
+ }
Object.keys(links)
.filter(matchSpecUrl)
@@ -421,22 +445,4 @@ function studyLinks(spec, links, report, edResults, trResults, htmlFragments) {
});
}
-/**************************************************
-Export methods for use as module
-**************************************************/
-export default studyBackrefs;
-
-if (process.argv[1] === fileURLToPath(import.meta.url)) {
- const crawl = await loadCrawlResults(process.argv[2], process.argv[3]);
- let htmlFragments = {};
- try {
- console.info('Downloading HTML spec fragments data…');
- htmlFragments = await fetch('https://html.spec.whatwg.org/multipage/fragment-links.json').then(r => r.json());
- console.info('- done');
- } catch (err) {
- console.error('- failed: could not fetch HTML fragments data, may report false positive broken links on HTML spec');
- }
-
- const results = studyBackrefs(crawl.ed, crawl.tr, htmlFragments, process.argv[4] ?? undefined);
- console.log(results);
-}
+export default studyBackrefs;
\ No newline at end of file
diff --git a/src/lib/study-crawl.js b/src/lib/study-crawl.js
deleted file mode 100644
index be210dc4..00000000
--- a/src/lib/study-crawl.js
+++ /dev/null
@@ -1,412 +0,0 @@
-/**
- * The crawl analyzer takes a crawl report as input and creates a report that
- * contains, for each spec, a list of potential anomalies, such as:
- *
- * 1. specs that do not seem to reference any other spec normatively;
- * 2. specs that define WebIDL terms but do not normatively reference the WebIDL
- * spec;
- * 3. specs that contain invalid WebIDL terms definitions;
- * 4. specs that use obsolete WebIDL constructs (e.g. `[]` instead of
- * `FrozenArray`);
- * 5. specs that define WebIDL terms that are *also* defined in another spec;
- * 6. specs that use WebIDL terms defined in another spec without referencing
- * that spec normatively;
- * 7. specs that use WebIDL terms for which the crawler could not find any
- * definition in any of the specs it studied;
- * 8. specs that link to another spec but do not include a reference to that
- * other spec;
- * 9. specs that link to another spec inconsistently in the body of the document
- * and in the list of references (e.g. because the body of the document
- * references the Editor's draft while the reference is to the latest published
- * version).
- * 10. W3C specs that do not have a known Editor's Draft
- *
- * @module analyzer
- */
-
-import fs from 'node:fs';
-import path from 'node:path';
-import { expandCrawlResult, isLatestLevelThatPasses } from 'reffy';
-import studyBackrefs from './study-backrefs.js';
-import { checkSpecDefinitions } from '../cli/check-missing-dfns.js';
-import { canonicalizeUrl, canonicalizesTo } from "./canonicalize-url.js";
-import loadJSON from './load-json.js';
-
-const array_concat = (a,b) => a.concat(b);
-const uniqueFilter = (item, idx, arr) => arr.indexOf(item) === idx;
-
-/**
- * Helper function that returns true when the given URL seems to target a real
- * "spec" (as opposed to, say, a Wiki page, or something else)
- */
-const matchSpecUrl = url =>
- url.match(/spec.whatwg.org/) ||
- url.match(/www.w3.org\/TR\/[a-z0-9]/) ||
- (url.match(/w3c.github.io/) && ! url.match(/w3c.github.io\/test-results\//));
-
-
-/**
- * Compares specs for ordering by title
- */
-const byTitle = (a, b) =>
- (a.title || '').toUpperCase().localeCompare((b.title || '').toUpperCase());
-
-
-/**
- * Returns true when the given error array is not set or does not contain any
- * error.
- */
-function isOK(errors) {
- return !errors || (errors.length === 0);
-}
-
-
-/**
- * Filter out spec info parameters that are not needed when the spec is to
- * appear as a reference in the final report, to keep the JSON report somewhat
- * readable.
- *
- * @function
- * @param {Object} spec The spec info to filter, typically the spec object
- * contained in the results of a crawl.
- * @return {Object} A new spec object that only contains the URL, title, the
- * URL that was crawled.
- */
-function filterSpecInfo(spec) {
- return {
- url: spec.url,
- title: spec.title,
- crawled: spec.crawled
- };
-}
-
-
-/**
- * Analyze the result of a crawl and produce a report that can easily be
- * converted without more processing to a human readable version.
- *
- * @function
- * @param {Array(Object)} A crawl result, one entry per spec
- * @param {Array(Object)} An optional list of specs to include in the report.
- * All specs are included by default.
- * @return {Array(Object)} A report, one entry per spec, each spec will have
- * a "report" property with "interesting" properties, see code comments inline
- * for details
- */
-async function studyCrawlResults(results, options = {}) {
- const knownIdlNames = results
- .map(r => r.idlparsed?.idlNames ? Object.keys(r.idlparsed.idlNames) : [], [])
- .reduce(array_concat)
- .filter(uniqueFilter);
- const knownGlobalNames = results
- .map(r => r.idlparsed?.globals ? Object.keys(r.idlparsed.globals) : [], [])
- .reduce(array_concat)
- .filter(uniqueFilter);
- const idlNamesIndex = {};
- knownIdlNames.forEach(name =>
- idlNamesIndex[name] = results.filter(spec =>
- isLatestLevelThatPasses(spec, results, s =>
- s.idlparsed?.idlNames?.[name])));
-
- // WebIDL-1 only kept for historical reasons to process old crawl results
- const WebIDLSpec = results.find(spec =>
- spec.shortname === 'webidl' || spec.shortname === 'WebIDL-1') || {};
-
- const sortedResults = results.sort(byTitle);
-
- // Construct spec equivalence from the crawl report, which should be more
- // complete than the initial equivalence list.
- const specEquivalents = {};
- sortedResults.forEach(spec =>
- spec.versions.forEach(v => {
- if (specEquivalents[v]) {
- if (Array.isArray(specEquivalents[v])) {
- specEquivalents[v].push(spec.url);
- }
- else {
- specEquivalents[v] = [specEquivalents[v], spec.url];
- }
- }
- else {
- specEquivalents[v] = spec.url;
- }
- }
- ));
-
- // Strong canonicalization options to find references
- var useEquivalents = {
- datedToLatest: true,
- equivalents: specEquivalents
- };
-
- const xrefsReport = studyBackrefs(sortedResults, options.trResults);
-
- const specsToInclude = options.include;
- return Promise.all(sortedResults
- .filter(spec => !specsToInclude ||
- (specsToInclude.length === 0) ||
- specsToInclude.some(toInclude =>
- toInclude === spec.shortname ||
- toInclude === spec.series?.shortname ||
- toInclude === spec.url ||
- toInclude === spec.crawled ||
- toInclude === spec.nightly?.url ||
- toInclude.shortname === spec.shortname ||
- toInclude.shortname === spec.series?.shortname ||
- (toInclude.url && toInclude.url === spec.url) ||
- (toInclude.url && toInclude.url === spec.crawled) ||
- (toInclude.url && toInclude.url === spec.nightly?.url) ||
- (toInclude.html && toInclude.html === spec.html)))
- .map(async spec => {
- spec.idlparsed = spec.idlparsed || {};
- spec.css = spec.css || {};
- spec.refs = spec.refs || {};
- spec.links = spec.links || {};
- const idlDfns = spec.idlparsed.idlNames ?
- Object.keys(spec.idlparsed.idlNames) : [];
- const idlExtendedDfns = spec.idlparsed.idlExtendedNames ?
- Object.keys(spec.idlparsed.idlExtendedNames) : [];
- const idlDeps = spec.idlparsed.externalDependencies ?
- spec.idlparsed.externalDependencies : [];
- const exposed = spec.idlparsed.exposed ? Object.keys(spec.idlparsed.exposed) : [];
-
- const xrefs = xrefsReport[spec.url];
- if (xrefs) {
- // The backrefs analysis tool includes the spec's title in its
- // report, which we already have at the top level.
- delete xrefs.title;
-
- // The backrefs analysis tool also includes a list of documents
- // that look like specs but that are not crawled. That is not
- // an anomaly with the spec but rather a list of potential specs
- // to be included in browser-specs. They should be treated
- // separately.
- delete xrefs.unknownSpecs;
- }
-
- const report = {
- // An error at this level means the spec could not be parsed at all
- error: spec.error,
-
- // Whether the crawler found normative references
- // (most specs should have)
- noNormativeRefs: !spec.refs.normative ||
- (spec.refs.normative.length === 0),
-
- // Whether the spec normatively references the WebIDL spec
- // (all specs that define IDL content should)
- noRefToWebIDL: (spec !== WebIDLSpec) &&
- (spec.idlparsed.bareMessage || (idlDfns.length > 0) || (idlExtendedDfns.length > 0)) &&
- (!spec.refs.normative || !spec.refs.normative.find(ref =>
- ref.name.match(/^WebIDL/i) ||
- (ref.url === WebIDLSpec.url) ||
- (WebIDLSpec.nightly && (ref.url === WebIDLSpec.nightly.url)))),
-
- // Whether the spec has invalid IDL content
- // (the crawler cannot do much when IDL content is invalid, it
- // cannot tell what IDL definitions and references the spec
- // contains in particular)
- hasInvalidIdl: !!(!spec.idlparsed.idlNames && spec.idlparsed.bareMessage),
-
- // Whether the spec uses IDL constructs that were valid in
- // WebIDL Level 1 but no longer are, typically "[]" instead of
- // "FrozenArray"
- hasObsoleteIdl: spec.idlparsed.hasObsoleteIdl,
-
- // List of Exposed names used in the spec that we know nothing
- // about because we cannot find a matching "Global" name in
- // any other spec
- unknownExposedNames: exposed
- .filter(name => !knownGlobalNames.includes(name) && name !== "*")
- .sort(),
-
- // List of IDL names used in the spec that we know nothing about
- // (for instance because of some typo or because the term is
- // defined in a spec that has not been crawled or that could
- // not be parsed)
- unknownIdlNames: idlDeps
- .filter(name => knownIdlNames.indexOf(name) === -1)
- .sort(),
-
- // List of IDL definitions that are already defined in some
- // other crawled spec
- // (this should not happen, ideally)
- redefinedIdlNames: idlDfns
- .filter(name => (idlNamesIndex[name].length > 1))
- .map(name => {
- return {
- name,
- refs: idlNamesIndex[name].filter(ref => (ref.url !== spec.url)).map(filterSpecInfo)
- };
- }),
-
- // List of IDL names used in the spec that are defined in some
- // other spec, and which do not seem to appear in the list of
- // normative references
- // (There should always be an entry in the normative list of
- // references that links to that other spec)
- // NB: "Exposed=Window", which would in theory trigger the need
- // to add a normative reference to HTML, is considered to be
- // an exception to the rule, and ignored.
- missingWebIdlRef: idlDeps
- .filter(name => knownIdlNames.indexOf(name) !== -1)
- .map(name => {
- const refs = idlNamesIndex[name].map(filterSpecInfo);
- let ref = null;
- if (spec.refs && spec.refs.normative) {
- ref = refs.find(s => {
- const canon = canonicalizeUrl(s.url, useEquivalents);
- return !!spec.refs.normative.find(r =>
- canonicalizesTo(r.url, canon, useEquivalents));
- });
- }
- return (ref ? null : { name, refs });
- })
- .filter(i => !!i),
-
- // CSS/IDL terms that do not have a corresponding dfn in the
- // specification
- missingDfns: await checkSpecDefinitions(spec),
-
- // Links to external specifications within the body of the spec
- // that do not have a corresponding entry in the references
- // (all links to external specs should have a companion ref)
- missingLinkRef: Object.keys(spec.links.rawlinks || {})
- .filter(matchSpecUrl)
- .filter(l => {
- // Filter out "good" and "inconsistent" references
- const canon = canonicalizeUrl(l, useEquivalents);
- const refs = (spec.refs.normative || []).concat(spec.refs.informative || []);
- return !refs.find(r => canonicalizesTo(r.url, canon, useEquivalents));
- })
- .filter(l =>
- // Ignore links to other versions of "self". There may
- // be cases where it would be worth reporting them but
- // most of the time they appear in "changelog" sections.
- !canonicalizesTo(l, spec.url, useEquivalents) &&
- !canonicalizesTo(l, spec.versions, useEquivalents)
- ),
-
- // Links to external specifications within the body of the spec
- // that have a corresponding entry in the references, but for
- // which the reference uses a different URL, e.g. because the
- // link targets the Editor's Draft, whereas the reference
- // targets the latest published version
- inconsistentRef: Object.keys(spec.links.rawlinks || {})
- .filter(matchSpecUrl)
- .map(l => {
- const canonSimple = canonicalizeUrl(l);
- const canon = canonicalizeUrl(l, useEquivalents);
- const refs = (spec.refs.normative || []).concat(spec.refs.informative || []);
-
- // Filter out "good" references
- if (refs.find(r => canonicalizesTo(r.url, canonSimple))) {
- return null;
- }
- const ref = refs.find(r => canonicalizesTo(r.url, canon, useEquivalents));
- return (ref ? { link: l, ref } : null);
- })
- .filter(l => !!l),
-
- // Lists of specs present in the crawl report that reference
- // the current spec, either normatively or informatively
- // (used to produce the dependencies report)
- referencedBy: {
- normative: sortedResults
- .filter(s =>
- s.refs && s.refs.normative &&
- s.refs.normative.find(r =>
- canonicalizesTo(r.url, spec.url, useEquivalents) ||
- canonicalizesTo(r.url, spec.versions, useEquivalents)))
- .map(filterSpecInfo),
- informative: sortedResults
- .filter(s =>
- s.refs && s.refs.informative &&
- s.refs.informative.find(r =>
- canonicalizesTo(r.url, spec.url, useEquivalents) ||
- canonicalizesTo(r.url, spec.versions, useEquivalents)))
- .map(filterSpecInfo)
- },
-
- // Analysis of cross-references to other specs
- xrefs: xrefsReport[spec.url]
- };
-
- // A spec is OK if it does not contain anything "suspicious".
- report.ok = !report.error &&
- !report.noNormativeRefs &&
- !report.hasInvalidIdl &&
- !report.hasObsoleteIdl &&
- !report.noRefToWebIDL &&
- !report.missingDfns.obsoleteDfnsModel &&
- isOK(report.unknownIdlNames) &&
- isOK(report.redefinedIdlNames) &&
- isOK(report.missingWebIdlRef) &&
- isOK(report.missingDfns.css.filter(r => !r.warning)) &&
- isOK(report.missingDfns.idl.filter(r => !r.warning)) &&
- isOK(report.missingLinkRef) &&
- isOK(report.inconsistentRef) &&
- (!report.xrefs || (
- isOK(report.xrefs.notExported) &&
- isOK(report.xrefs.notDfn) &&
- isOK(report.xrefs.brokenLinks) &&
- isOK(report.xrefs.evolvingLinks) &&
- isOK(report.xrefs.outdatedSpecs) &&
- isOK(report.xrefs.datedUrls)));
-
- const res = {
- title: spec.title || spec.url,
- shortname: spec.shortname,
- date: spec.date,
- url: spec.url,
- release: spec.release,
- nightly: spec.nightly,
- crawled: spec.crawled,
- organization: spec.organization,
- groups: spec.groups,
- report
- };
- return res;
- }));
-}
-
-async function studyCrawl(crawlResults, options = {}) {
- if (typeof crawlResults === 'string') {
- const crawlResultsPath = crawlResults;
- crawlResults = await loadJSON(crawlResults);
- crawlResults = await expandCrawlResult(crawlResults, path.dirname(crawlResultsPath));
- }
- else {
- crawlResults = crawlResults || {};
- }
- crawlResults.results = crawlResults.results || [];
- crawlResults.stats = crawlResults.stats || {};
-
- if (typeof options.trResults === 'string') {
- const crawlResultsPath = options.trResults;
- options.trResults = await loadJSON(options.trResults);
- options.trResults = await expandCrawlResult(options.trResults, path.dirname(crawlResultsPath));
- options.trResults = options.trResults.results;
- }
-
- const results = await studyCrawlResults(crawlResults.results, options);
-
- return {
- type: 'study',
- title: crawlResults.title || 'Web specs analysis',
- description: crawlResults.description || '',
- date: crawlResults.date || (new Date()).toJSON(),
- stats: {
- crawled: crawlResults.stats.crawled || crawlResults.results.length,
- errors: crawlResults.stats.errors || crawlResults.results.filter(spec => !!spec.error).length,
- studied: results.length || crawlResults.stats.crawled
- },
- results: results
- };
-}
-
-
-/**************************************************
-Export methods for use as module
-**************************************************/
-export default studyCrawl;
diff --git a/src/cli/check-missing-dfns.js b/src/lib/study-dfns.js
similarity index 69%
rename from src/cli/check-missing-dfns.js
rename to src/lib/study-dfns.js
index be298a75..0b00e5f4 100644
--- a/src/cli/check-missing-dfns.js
+++ b/src/lib/study-dfns.js
@@ -1,21 +1,8 @@
-#!/usr/bin/env node
/**
* The definitions checker compares CSS, dfns, and IDL extracts created by Reffy
* to detect CSS/IDL terms that do not have a corresponding dfn in the
* specification.
*
- * The definitions checker can be called directly through:
- *
- * `node check-missing-dfns.js [crawl report] [spec] [format]`
- *
- * where:
- * - `crawl report` is the local path to the root folder that contains the
- * `index.json` and the extracts (e.g. `reports/ed`)
- * - `spec` is the optional shortname of the specification on which to focus or
- * `all` (default) to check all specs
- * - `format` is the optional output format. Either `json` or `markdown` with
- * `markdown` being the default.
- *
* Note: CSS extraction already relies on dfns and reports missing dfns in a
* "warnings" property. This checker simply looks at that list.
*
@@ -100,7 +87,7 @@ function matchCSSDfn(expected, actual) {
*
* @function
* @private
- * @param {Object} css The root of the object that describes IDL terms in the
+ * @param {Object} idl The root of the object that describes IDL terms in the
* `idlparsed` extract.
* @return {Array} An array of expected definitions
*/
@@ -121,7 +108,7 @@ function getExpectedDfnsFromIdl(idl = {}) {
/**
* Return true if the given parsed IDL object describes a default toJSON
* operation that references:
- * https://heycam.github.io/webidl/#default-tojson-steps
+ * https://webidl.spec.whatwg.org/#default-tojson-steps
*
* @function
* @private
@@ -140,9 +127,9 @@ function isDefaultToJSONOperation(desc) {
*
* @function
* @public
- * @param {Object} desc The object that describes the IDL term in the
+ * @param {Object} idl The object that describes the IDL term in the
* `idlparsed` extract.
- * @param {Object} parentDesc (optional) The object that describes the parent
+ * @param {Object} parentIdl (optional) The object that describes the parent
* IDL term of the term to parse (used to compute the `for` property).
* @return {Object} The expected definition, or null if no expected definition
* is defined.
@@ -318,7 +305,7 @@ function getExpectedDfnsFromIdlDesc(idl, {excludeRoot} = {excludeRoot: false}) {
*
* The function works around Respec's issue #3200 for methods and constructors
* that take only optional parameters:
- * https://github.com/w3c/respec/issues/3200
+ * https://github.com/speced/respec/issues/3200
*
* @function
* @private
@@ -352,29 +339,18 @@ function matchIdlDfn(expected, actual,
* @function
* @public
* @param {Object} spec Crawl result for the spec to parse
- * @param {String} options Check options. Set the rootFolder property to the
- * root folder against which to resolve relative paths to load CSS/IDL
- * extracts (only needed if the extracts have not yet been loaded and attached
- * to the spec object). Set the includeObsolete property to true to include
- * detailed results about specs that use an obsolete dfns data model.
* @return {Object} An object with a css and idl property, each of them holding
* an array of missing CSS or IDL definitions. The function returns null when
* there are no missing definitions.
*/
-async function checkSpecDefinitions(spec, options = {}) {
- if (!options.includeObsolete && specsWithObsoleteDfnsModel.includes(spec.shortname)) {
+function checkSpecDefinitions(spec) {
+ if (specsWithObsoleteDfnsModel.includes(spec.shortname)) {
return { obsoleteDfnsModel: true };
}
- const dfns = (typeof spec.dfns === "string") ?
- (await loadJSON(path.resolve(options.rootFolder, spec.dfns))).dfns :
- (spec.dfns || []);
- const css = (typeof spec.css === "string") ?
- (await loadJSON(path.resolve(options.rootFolder, spec.css))) :
- (spec.css || {});
- const idl = (typeof spec.idlparsed === "string") ?
- (await loadJSON(path.resolve(options.rootFolder, spec.idlparsed))).idlparsed :
- spec.idlparsed;
+ const dfns = spec.dfns ?? [];
+ const css = spec.css ?? {};
+ const idl = spec.idlparsed ?? {};
// Make sure that all expected CSS definitions exist in the dfns extract
const expectedCSSDfns = getExpectedDfnsFromCSS(css);
@@ -455,134 +431,50 @@ async function checkSpecDefinitions(spec, options = {}) {
/**
- * Checks the CSS and IDL extracts against the dfns extract for all specs in
- * the report.
- *
- * @function
- * @public
- * @param {String} pathToReport Path to the root folder that contains the
- * `index.json` report file and the extracts subfolders.
- * @param {Object} options Check options. Set the "shortname" property to a
- * spec's shortname to only check that spec.
- * @return {Array} The list of specifications along with dfn problems that have
- * been identified. Each entry has `url`, 'crawled`, `shortname` properties to
- * identify the specification, and a `missing` property that is an object that
- * may have `css` and `idl` properties which list missing CSS/IDL definitions.
- */
-async function checkDefinitions(pathToReport, options = {}) {
- const rootFolder = path.resolve(process.cwd(), pathToReport);
- const index = (await loadJSON(path.resolve(rootFolder, 'index.json'))).results;
-
- // Check all dfns against CSS and IDL extracts
- const checkOptions = {
- rootFolder,
- includeObsolete: !!options.shortname
- };
- const missing = await Promise.all(
- index
- .filter(spec => !options.shortname || spec.shortname === options.shortname)
- .map(async spec => {
- const res = {
- url: spec.url,
- crawled: spec.crawled,
- shortname: spec.shortname,
- };
- if (!spec.dfns) {
- return res;
- }
- res.missing = await checkSpecDefinitions(spec, checkOptions);
- return res;
- })
- );
-
- return missing;
-}
-
-
-/**
- * Report missing dfn to the console as Markdown
+ * Format the anomaly message to report as Markdown
*
* @function
* @private
* @param {Object} missing Object that describes missing dfn
*/
-function reportMissing(missing) {
+function formatAnomalyMessage(missing) {
const exp = missing.expected;
const found = missing.found;
const foundFor = (found && found.for && found.for.length > 0) ?
' for ' + found.for.map(f => `\`${f}\``).join(',') :
'';
- console.log(`- \`${exp.linkingText[0]}\` ${exp.type ? `with type \`${exp.type}\`` : ''}` +
+ return '`' + exp.linkingText[0] + '` ' +
+ (exp.type ? `with type \`${exp.type}\`` : '') +
(missing.for ? ` for [\`${missing.for.linkingText[0]}\`](${missing.for.href})` : '') +
- (found ? `, but found [\`${found.linkingText[0]}\`](${found.href}) with type \`${found.type}\`${foundFor}` : ''));
+ (found ? `, but found [\`${found.linkingText[0]}\`](${found.href}) with type \`${found.type}\`${foundFor}` : '');
}
-/**************************************************
-Export methods for use as module
-**************************************************/
-export {
- checkSpecDefinitions,
- checkDefinitions,
-
- // "Inner" functions that the IDL names generator uses to link IDL terms with
- // their definition (see generate-idlnames.js)
- getExpectedDfnFromIdlDesc,
- matchIdlDfn
-}
-
-
-/**************************************************
-Code run if the code is run as a stand-alone module
-**************************************************/
-if (process.argv[1] === fileURLToPath(import.meta.url)) {
- const pathToReport = process.argv[2];
- const shortname = process.argv[3] || 'all';
- const format = process.argv[4] || 'markdown';
-
- const options = (shortname === 'all') ? undefined : { shortname };
- let res = await checkDefinitions(pathToReport, options);
- if (shortname === 'all') {
- res = res
- .filter(result => result.missing &&
- !result.missing.obsoleteDfnsModel &&
- ((result.missing.css.length > 0) || (result.missing.idl.length > 0)));
- }
-
- if (format === 'json') {
- console.log(JSON.stringify(res, null, 2));
- }
- else {
- res.forEach(result => {
- const missing = result.missing || {css: [], idl: []};
- const errors = ['css', 'idl']
- .map(type => result.missing[type].filter(missing => !missing.warning))
- .flat();
- const warnings = ['css', 'idl']
- .map(type => result.missing[type].filter(missing => missing.warning))
- .flat();
- console.log('');
- console.log(`${result.shortname} (${errors.length} errors, ${warnings.length} warnings)
`);
- console.log();
- if (errors.length === 0 && warnings.length === 0) {
- console.log('All good!');
- }
- if (errors.length > 0) {
- console.log('');
- console.log(`Errors (${errors.length})
`);
- console.log();
- errors.forEach(reportMissing);
- console.log(' ');
- }
- if (warnings.length > 0) {
- console.log('');
- console.log(`Warnings (${warnings.length})
`);
- console.log();
- warnings.forEach(reportMissing);
- console.log(' ');
+/**
+ * Checks the CSS and IDL extracts against the dfns extract for all specs in
+ * the report, and return a list of missing definitions.
+ *
+ * @function
+ * @public
+ */
+export default function studyDefinitions(specs) {
+ return specs
+ .map(spec => {
+ const missing = checkSpecDefinitions(spec);
+ const res = [];
+ if (!missing.obsoleteDfnsModel) {
+ for (const type of ['css', 'idl']) {
+ const anomalies = missing[type];
+ for (const anomaly of anomalies) {
+ res.push({
+ name: 'missingDfns',
+ message: formatAnomalyMessage(anomaly),
+ spec
+ });
+ }
}
- console.log(' ');
- console.log();
- })
- }
-}
\ No newline at end of file
+ }
+ return res;
+ })
+ .flat();
+}
diff --git a/src/lib/study-refs.js b/src/lib/study-refs.js
index f7542486..19bc99c6 100644
--- a/src/lib/study-refs.js
+++ b/src/lib/study-refs.js
@@ -1,31 +1,116 @@
-import { loadCrawlResults, recordCategorizedAnomaly } from './util.js';
-import { fileURLToPath } from 'node:url';
+import { canonicalizeUrl, canonicalizesTo } from './canonicalize-url.js';
-const possibleAnomalies = [
- 'discontinuedReferences'
-];
+/**
+ * Helper function that returns true when the given URL seems to target a real
+ * "spec" (as opposed to, say, a Wiki page, or something else)
+ */
+const matchSpecUrl = url =>
+ url.match(/spec.whatwg.org/) ||
+ url.match(/www.w3.org\/TR\/[a-z0-9]/) ||
+ (url.match(/w3c.github.io/) && ! url.match(/w3c.github.io\/test-results\//));
-function studyReferences (edResults) {
- const report = [];
- const recordAnomaly = recordCategorizedAnomaly(report, 'refs', possibleAnomalies);
- edResults.forEach(spec => {
- (spec.refs?.normative || []).forEach(ref => {
- const referencedSpec = edResults.find(s => s.url === ref.url || s?.nightly?.url === ref.url || s?.nightly?.alternateUrls?.includes(ref.url));
+function studyReferences (specs, { crawlResults = null } = {}) {
+ crawlResults = crawlResults ?? specs;
- if (referencedSpec && referencedSpec.standing === "discontinued") {
+ // Construct spec equivalence from the crawl report
+ const specEquivalents = {};
+ for (const spec of crawlResults) {
+ for (const v of (spec.versions ?? [])) {
+ if (specEquivalents[v]) {
+ if (Array.isArray(specEquivalents[v])) {
+ specEquivalents[v].push(spec.url);
+ }
+ else {
+ specEquivalents[v] = [specEquivalents[v], spec.url];
+ }
+ }
+ else {
+ specEquivalents[v] = spec.url;
+ }
+ }
+ }
+
+ // Strong canonicalization options to find references
+ const useEquivalents = {
+ datedToLatest: true,
+ equivalents: specEquivalents
+ };
- const newSpecsLinks = edResults.filter(s => referencedSpec.obsoletedBy?.includes(s.shortname)).map(s => `[${s.shortname}](${s?.nightly?.url || s.url})`);
- recordAnomaly(spec, 'discontinuedReferences', `[${ref.name}](${ref.url}) ${newSpecsLinks.length ? `has been obsoleted by ${newSpecsLinks}` : `is discontinued, no known replacement reference`}`);
+ const report = [];
+ for (const spec of specs) {
+ for (const ref of spec.refs?.normative ?? []) {
+ const referencedSpec = crawlResults.find(s =>
+ s.url === ref.url ||
+ s?.nightly?.url === ref.url ||
+ s?.nightly?.alternateUrls?.includes(ref.url));
+ if (referencedSpec && referencedSpec.standing === "discontinued") {
+ const newSpecsLinks = crawlResults
+ .filter(s => referencedSpec.obsoletedBy?.includes(s.shortname))
+ .map(s => `[${s.shortname}](${s?.nightly?.url || s.url})`);
+ report.push({
+ name: 'discontinuedReferences',
+ message: `[${ref.name}](${ref.url}) ${newSpecsLinks.length ? `has been obsoleted by ${newSpecsLinks}` : `is discontinued, no known replacement reference`}`,
+ spec
+ });
}
- });
- });
+ }
+
+ // Detect links to external specifications within the body of the spec
+ // that do not have a corresponding entry in the list of references
+ // (all links to external specs should have a companion ref)
+ Object.keys(spec.links?.rawlinks ?? {})
+ .filter(matchSpecUrl)
+ .filter(l => {
+ // Filter out "good" and "inconsistent" references
+ const canon = canonicalizeUrl(l, useEquivalents);
+ const refs = (spec.refs?.normative ?? []).concat(spec.refs?.informative ?? []);
+ return !refs.find(r => canonicalizesTo(r.url, canon, useEquivalents));
+ })
+ .filter(l =>
+ // Ignore links to other versions of "self". There may
+ // be cases where it would be worth reporting them but
+ // most of the time they appear in "changelog" sections.
+ !canonicalizesTo(l, spec.url, useEquivalents) &&
+ !canonicalizesTo(l, spec.versions, useEquivalents)
+ )
+ .forEach(l => {
+ report.push({
+ name: 'missingReferences',
+ message: l,
+ spec
+ });
+ });
+
+ // Detect links to external specifications within the body of the spec
+ // that have a corresponding entry in the references, but for which the
+ // reference uses a different URL, e.g., because the link targets the
+ // Editor's Draft, whereas the reference targets the latest published
+ // version
+ Object.keys(spec.links?.rawlinks ?? {})
+ .filter(matchSpecUrl)
+ .map(l => {
+ const canonSimple = canonicalizeUrl(l);
+ const canon = canonicalizeUrl(l, useEquivalents);
+ const refs = (spec.refs?.normative ?? [])
+ .concat(spec.refs?.informative ?? []);
+
+ // Filter out "good" references
+ if (refs.find(r => canonicalizesTo(r.url, canonSimple))) {
+ return null;
+ }
+ const ref = refs.find(r => canonicalizesTo(r.url, canon, useEquivalents));
+ return (ref ? { link: l, ref } : null);
+ })
+ .filter(anomaly => !!anomaly)
+ .forEach(anomaly => {
+ report.push({
+ name: 'inconsistentReferences',
+ message: `${anomaly.link}, related reference "${anomaly.ref.name}" uses URL ${anomaly.ref.url}`,
+ spec
+ });
+ });
+ }
return report;
}
export default studyReferences;
-
-if (process.argv[1] === fileURLToPath(import.meta.url)) {
- const crawl = await loadCrawlResults(process.argv[2]);
- const results = studyReferences(crawl.ed);
- console.log(results);
-}
diff --git a/src/lib/study-webidl.js b/src/lib/study-webidl.js
index fc09365f..f5711326 100644
--- a/src/lib/study-webidl.js
+++ b/src/lib/study-webidl.js
@@ -5,52 +5,26 @@
* object structure:
*
* {
- * "category": "webidl",
* "name": "type of anomaly",
* "message": "Description of the anomaly",
- * "specs": [
- * { spec that contains or triggers the anomaly },
- * { another spec that contains or triggers the anomaly },
- * ...
- * ]
+ * "spec": { spec that contains or triggers the anomaly }
* }
+ *
+ * Some anomalies may be associated with more than one spec, when the code
+ * cannot tell which spec needs fixing (e.g., when checking duplicates while
+ * merging partials). In such cases, the `spec` property is replaced by a
+ * `specs` property that contains an array of specs.
*
- * All anomalies will be associated with at least one spec (so specs.length > 0)
- * but some of them may be associated with more than one, when the code cannot
- * tell which of them needs to be fixed (e.g. when checking duplicates while
- * merging partials).
- *
- * The spec object returned in the "specs" array is the spec object provided in
- * the crawl results parameter.
+ * The spec object returned in the `spec` and `specs` properties is the spec
+ * object provided in the crawl results parameter.
*/
-import { recordCategorizedAnomaly } from './util.js';
import * as WebIDL2 from 'webidl2';
const getSpecs = list => [...new Set(list.map(({ spec }) => spec))];
const specName = spec => spec.shortname ?? spec.url;
const dfnName = dfn => `${dfn.idl.partial ? 'partial ' : ''}${dfn.idl.type} "${dfn.idl.name}"`;
-const possibleAnomalies = [
- 'incompatiblePartialIdlExposure',
- 'invalid',
- 'noExposure',
- 'noOriginalDefinition',
- 'overloaded',
- 'redefined',
- 'redefinedIncludes',
- 'redefinedMember',
- 'redefinedWithDifferentTypes',
- 'singleEnumValue',
- 'unexpectedEventHandler',
- 'unknownExposure',
- 'unknownExtAttr',
- 'unknownType',
- 'wrongCaseEnumValue',
- 'wrongKind',
- 'wrongType'
-];
-
const basicTypes = new Set([
// Types defined by Web IDL itself:
'any', // https://webidl.spec.whatwg.org/#idl-any
@@ -192,7 +166,7 @@ function describeMember (member) {
return desc;
}
-function studyWebIdl (edResults, curatedResults) {
+function studyWebIdl (specs, { crawledResults = [], curatedResults = [] } = {}) {
const report = []; // List of anomalies to report
const dfns = {}; // Index of IDL definitions (save includes)
const includesStatements = {}; // Index of "includes" statements
@@ -200,8 +174,21 @@ function studyWebIdl (edResults, curatedResults) {
const usedTypes = {}; // Index of types used in the IDL
const usedExtAttrs = {}; // Index of extended attributes
- // Record an anomaly for the given spec(s).
- const recordAnomaly = recordCategorizedAnomaly(report, 'webidl', possibleAnomalies);
+ // Record an anomaly for the given spec(s),
+ // provided we are indeed interested in the results
+ function recordAnomaly (spec, name, message) {
+ if (Array.isArray(spec)) {
+ const filtered = spec.filter(sp => specs.find(s => s.shortname === sp.shortname));
+ if (filtered.length > 0) {
+ report.push({ name, message, specs: filtered });
+ }
+ }
+ else {
+ if (specs.find(s => s.shortname === spec.shortname)) {
+ report.push({ name, message, spec });
+ }
+ }
+ }
function inheritsFrom (iface, ancestor) {
if (!iface.inheritance) return false;
@@ -397,7 +384,11 @@ function studyWebIdl (edResults, curatedResults) {
}
}
- edResults
+ // We need to run the analysis on all specs, even if caller is only
+ // interested in a few of them, because types may be defined in specs that
+ // the caller is not interested in.
+ const allSpecs = (crawledResults.length > 0) ? crawledResults : specs;
+ allSpecs
// We're only interested in specs that define Web IDL content
.filter(spec => !!spec.idl)
@@ -666,7 +657,4 @@ function studyWebIdl (edResults, curatedResults) {
return report;
}
-/**************************************************
-Export methods for use as module
-**************************************************/
export default studyWebIdl;
diff --git a/src/lib/study.js b/src/lib/study.js
new file mode 100644
index 00000000..7f6264ea
--- /dev/null
+++ b/src/lib/study.js
@@ -0,0 +1,618 @@
+import studyDfns from './study-dfns.js';
+import studyAlgorithms from './study-algorithms.js';
+import studyBackrefs from './study-backrefs.js';
+import studyRefs from './study-refs.js';
+import studyWebIdl from './study-webidl.js';
+import isInMultiSpecRepository from './is-in-multi-spec-repo.js';
+import { recordCategorizedAnomaly } from './util.js';
+
+/**
+ * List of anomalies, grouped per study function
+ */
+const anomalyGroups = [
+ {
+ name: 'generic',
+ title: 'Generic',
+ description: 'The following errors prevented the spec from being analyzed',
+ types: [
+ {
+ name: 'error',
+ title: 'Crawl error',
+ description: 'The following crawl errors occurred'
+ }
+ ],
+ study: (specs) => specs
+ .filter(spec => !!spec.error)
+ .map(spec => Object.assign(
+ { name: 'error', message: spec.error, spec }
+ ))
+ },
+
+ {
+ name: 'dfns',
+ title: 'Problems with definitions',
+ description: 'The following problems were identified in term definitions',
+ types: [
+ {
+ name: 'missingDfns',
+ title: 'Missing definitions',
+ description: 'The following constructs were found without a definition'
+ }
+ ],
+ study: studyDfns
+ },
+
+ {
+ name: 'backrefs',
+ title: 'Problems with links to other specs',
+ description: 'The following problems were identified when analyzing links to other specifications',
+ types: [
+ {
+ name: 'brokenLinks',
+ title: 'Broken links',
+ description: 'The following links to other specifications were detected as pointing to non-existing anchors'
+ },
+ {
+ name: 'datedUrls',
+ title: 'Links to dated TR URLs',
+ description: 'The following links target a dated version of a specification'
+ },
+ {
+ name: 'evolvingLinks',
+ title: 'Links to now gone anchors',
+ description: 'The following links in the specification link to anchors that no longer exist in the Editor\'s Draft of the targeted specification'
+ },
+ {
+ name: 'frailLinks',
+ title: 'Unstable link anchors',
+ description: 'The following links in the specification link to anchors that either have a new name or are inherently brittle'
+ },
+ {
+ name: 'nonCanonicalRefs',
+ title: 'Non-canonical links',
+ description: 'The following links were detected as pointing to outdated URLs'
+ },
+ {
+ name: 'notDfn',
+ title: 'Links to unofficial anchors',
+ description: 'The following links were detected as pointing to anchors that are neither definitions or headings in the targeted specification'
+ },
+ {
+ name: 'notExported',
+ title: 'Links to non-exported definitions',
+ description: 'The following links were detected as pointing to a private definition in the targeted specification'
+ },
+ {
+ name: 'outdatedSpecs',
+ title: 'Outdated references',
+ description: 'The following links were detected as pointing to outdated specifications'
+ },
+ {
+ name: 'unknownSpecs',
+ title: 'Links to unknown specs',
+ description: 'The following links were detected as pointing to documents that are not recognized as specifications'
+ }
+ ],
+ study: studyBackrefs,
+ studyParams: ['tr']
+ },
+
+ {
+ name: 'algorithms',
+ title: 'Problems with algorithms',
+ description: 'The following problems were identified when analyzing algorithms',
+ types: [
+ {
+ name: 'missingTaskForPromise',
+ title: 'Missing tasks in parallel steps to handle a promise',
+ description: 'The following algorithms resolve or reject a Promise within a step that runs [in parallel](https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel) without first queuing a task'
+ },
+ {
+ name: 'missingTaskForEvent',
+ title: 'Missing tasks in parallel steps to fire an event',
+ description: 'The following algorithms fire an event within a step that runs [in parallel](https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel) without first queuing a task'
+ }
+ ],
+ study: studyAlgorithms
+ },
+
+ {
+ name: 'refs',
+ title: 'Problems with references',
+ description: 'The following problems were identified when analyzing the list of references',
+ types: [
+ {
+ name: 'discontinuedReferences',
+ title: 'Normative references to discontinued specs',
+ description: 'The following normative references were detected as pointing to discontinued specifications'
+ },
+ {
+ name: 'missingReferences',
+ title: 'Missing references',
+ description: 'The following links target specifications that are not mentioned in the list of references'
+ },
+ {
+ name: 'inconsistentReferences',
+ title: 'Inconsistent reference links',
+ description: 'The following links use a different URL for the targeted specification from the URL defined in the references'
+ }
+ ],
+ study: studyRefs
+ },
+
+ {
+ name: 'webidl',
+ title: 'Web IDL problems',
+ description: 'The following Web IDL problems were identified',
+ types: [
+ { name: 'incompatiblePartialIdlExposure', title: 'Incompatible `[Exposed]` attribute in partial definitions' },
+ { name: 'invalid', title: 'Invalid Web IDL' },
+ { name: 'noExposure', title: 'Missing `[Exposed]` attributes' },
+ { name: 'noOriginalDefinition', title: 'Missing base interfaces' },
+ { name: 'overloaded', title: 'Invalid overloaded operations' },
+ { name: 'redefined', title: 'Duplicated IDL names' },
+ { name: 'redefinedIncludes', title: 'Duplicated `includes` statements' },
+ { name: 'redefinedMember', title: 'Duplicated members' },
+ { name: 'redefinedWithDifferentTypes', title: 'Duplicated IDL names with different types' },
+ { name: 'singleEnumValue', title: 'Enums with a single value' },
+ { name: 'unexpectedEventHandler', title: 'Missing `EventTarget` inheritances' },
+ { name: 'unknownExposure', title: 'Unknown globals in `[Exposed]` attribute' },
+ { name: 'unknownExtAttr', title: 'Unknown extended attributes' },
+ { name: 'unknownType', title: 'Unknown Web IDL type' },
+ { name: 'wrongCaseEnumValue', title: 'Enums with wrong casing' },
+ { name: 'wrongKind', title: 'Invalid inheritance chains' },
+ { name: 'wrongType', title: 'Web IDL names incorrectly used as types' }
+ ],
+ study: studyWebIdl,
+ studyParams: ['curated']
+ }
+];
+
+
+/**
+ * Possible report structures.
+ *
+ * "/" separates levels in the hierarchy.
+ * "+" combines creates a composed key at a given level.
+ *
+ * For example, "group+spec/type" means: first level per
+ * anomaly group and spec (so one "web-animations-2-webidl" entry if the
+ * spec "web-animations-2" has "webidl" issues), second level per type.
+ *
+ * The list is described in more details in the CLI help. Run:
+ * npx strudy inspect --help
+ * ... or check the code in `strudy.js` at the root of the project.
+ */
+const reportStructures = [
+ 'flat',
+ 'type+spec',
+ 'group+spec',
+ 'group+spec/type',
+ 'spec/type',
+ 'spec/group/type',
+ 'type/spec',
+ 'group/type/spec',
+ 'group/spec/type'
+];
+
+
+// Compute mapping between an anomaly type and its parent group
+const anomalyToGroup = {};
+for (const group of anomalyGroups) {
+ for (const type of group.types) {
+ anomalyToGroup[type.name] = group;
+ }
+}
+
+/**
+ * Return an object that describes the requested anomaly type
+ */
+function getAnomalyType(name) {
+ for (const group of anomalyGroups) {
+ const type = group.types.find(t => t.name === name);
+ if (type) {
+ return Object.assign({}, type);
+ }
+ }
+ return null;
+}
+
+/**
+ * Return an object that describes the requested anomaly group
+ */
+function getAnomalyGroup(name) {
+ for (const group of anomalyGroups) {
+ if (group.name === name) {
+ return {
+ name: group.name,
+ title: group.title
+ };
+ }
+ }
+ return null;
+}
+
+/**
+ * Return an object that describes the requested anomaly group
+ * from the given anomaly type
+ */
+function getAnomalyGroupFromType(type) {
+ const name = anomalyToGroup[type];
+ return getAnomalyGroup(name);
+}
+
+
+/**
+ * Structure a flat list of anomalies to the requested structure
+ */
+function structureResults(structure, anomalies, crawlResults) {
+ const levels = structure.split('/')
+ .map(level => level.replace(/\s+/g, ''));
+ const report = [];
+
+ switch (levels[0]) {
+ case 'flat':
+ for (const anomaly of anomalies) {
+ report.push(anomaly);
+ }
+ break;
+
+ case 'type+spec':
+ for (const anomaly of anomalies) {
+ const type = getAnomalyType(anomaly.name);
+ for (const spec of anomaly.specs) {
+ let entry = report.find(entry =>
+ entry.type.name === anomaly.name &&
+ entry.spec.shortname === spec.shortname);
+ if (!entry) {
+ const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ?
+ `[${spec.shortname}] ` : '';
+ entry = {
+ name: `${spec.shortname}-${type.name.toLowerCase()}`,
+ title: `${titlePrefix}${type.title} in ${spec.title}`,
+ type, spec, anomalies: []
+ };
+ report.push(entry);
+ }
+ entry.anomalies.push(anomaly);
+ }
+ }
+ break;
+
+ case 'group+spec':
+ for (const anomaly of anomalies) {
+ const group = anomalyToGroup[anomaly.name];
+ for (const spec of anomaly.specs) {
+ let entry = report.find(entry =>
+ entry.group.name === group.name &&
+ entry.spec.shortname === spec.shortname);
+ if (!entry) {
+ const titlePrefix = isInMultiSpecRepository(spec, crawlResults) ?
+ `[${spec.shortname}] ` : '';
+ entry = {
+ name: `${spec.shortname}-${group.name.toLowerCase()}`,
+ title: `${titlePrefix}${group.title} in ${spec.title}`,
+ group, spec, anomalies: []
+ };
+ report.push(entry);
+ }
+ entry.anomalies.push(anomaly);
+ }
+ }
+ break;
+
+ case 'spec':
+ for (const anomaly of anomalies) {
+ for (const spec of anomaly.specs) {
+ let entry = report.find(entry =>
+ entry.spec.shortname === spec.shortname);
+ if (!entry) {
+ entry = {
+ name: spec.shortname,
+ title: spec.title,
+ spec, anomalies: []
+ };
+ report.push(entry);
+ }
+ entry.anomalies.push(anomaly);
+ }
+ }
+ break;
+
+ case 'type':
+ for (const anomaly of anomalies) {
+ const type = getAnomalyType(anomaly.name);
+ let entry = report.find(entry => entry.type.name === anomaly.name);
+ if (!entry) {
+ entry = {
+ name: type.name.toLowerCase(),
+ title: type.title,
+ type, anomalies: []
+ };
+ report.push(entry);
+ }
+ entry.anomalies.push(anomaly);
+ }
+ break;
+
+ case 'group':
+ for (const anomaly of anomalies) {
+ const group = anomalyToGroup[anomaly.name];
+ let entry = report.find(entry => entry.group.name === group.name);
+ if (!entry) {
+ entry = {
+ name: group.name.toLowerCase(),
+ title: group.title,
+ group, anomalies: []
+ };
+ report.push(entry);
+ }
+ entry.anomalies.push(anomaly);
+ }
+ break;
+ }
+
+ if (levels.length > 1) {
+ const itemsStructure = levels.slice(1).join('/');
+ for (const entry of report) {
+ entry.items = structureResults(itemsStructure, entry.anomalies, crawlResults);
+ delete entry.anomalies;
+ }
+ }
+ return report;
+}
+
+
+function makeLowerCase(description) {
+ return description.charAt(0).toLowerCase() + description.slice(1);
+}
+
+function pad(str, depth) {
+ while (depth > 1) {
+ str = ' ' + str;
+ depth -= 1;
+ }
+ return str;
+}
+
+function serializeEntry(entry, format, depth = 0) {
+ let res;
+ if (format === 'json') {
+ res = Object.assign({}, entry);
+ if (entry.spec) {
+ res.spec = {
+ url: entry.spec.url,
+ shortname: entry.spec.shortname,
+ title: entry.spec.title
+ };
+ }
+ if (entry.specs) {
+ res.specs = entry.specs.map(spec => Object.assign({
+ url: spec.url,
+ shortname: spec.shortname,
+ title: spec.title
+ }));
+ }
+ if (entry.items) {
+ res.items = entry.items.map(item => serializeEntry(item, format, depth + 1));
+ }
+ if (entry.anomalies) {
+ res.anomalies = entry.anomalies.map(anomaly => serializeEntry(anomaly, format, depth + 1));
+ }
+ }
+ else if (format === 'markdown') {
+ res = '';
+ if (entry.spec && entry.group) {
+ res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.group.description ?? entry.group.title)}:`;
+ }
+ else if (entry.spec && entry.type) {
+ res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), ${makeLowerCase(entry.type.description ?? entry.type.title)}:`;
+ }
+ else if (entry.group) {
+ if (depth === 0) {
+ res = (entry.group.description ?? entry.group.title) + ':';
+ }
+ else {
+ res = pad(`* ${entry.group.title}`, depth);
+ }
+ }
+ else if (entry.type) {
+ if (depth === 0) {
+ res = (entry.type.description ?? entry.type.title) + ':';
+ }
+ else {
+ res = pad(`* ${entry.type.title}`, depth);
+ }
+ }
+ else if (entry.spec) {
+ if (depth === 0) {
+ res = `While crawling [${entry.spec.title}](${entry.spec.crawled}), the following anomalies were identified:`;
+ }
+ else {
+ res = pad(`* [${entry.spec.title}](${entry.spec.crawled})`, depth);
+ }
+ }
+ else if (entry.message) {
+ res = pad(`* [ ] ${entry.message}`, depth);
+ }
+
+ for (const item of entry.items ?? []) {
+ res += '\n' + serializeEntry(item, format, depth + 1);
+ }
+ for (const anomaly of entry.anomalies ?? []) {
+ res += `\n` + serializeEntry(anomaly, format, depth + 1);
+ }
+ }
+ return res;
+}
+
+
+/**
+ * Format the structured report as JSON or markdown, or a combination of both
+ */
+function formatReport(format, report) {
+ if (format === 'json') {
+ // We'll return the report as is, trimming the information about specs to
+ // a reasonable minimum (the rest of the information can easily be
+ // retrieved from the crawl result if needed)
+ return report.map(entry => serializeEntry(entry, 'json'));
+ }
+ else if (format === 'issue') {
+ return report.map(entry => Object.assign({
+ name: entry.name,
+ title: entry.title,
+ spec: entry.spec,
+ content: serializeEntry(entry, 'markdown')
+ }));
+ }
+ else if (format === 'full') {
+ return [
+ {
+ title: 'Study report',
+ content: report.map(entry =>
+`## ${entry.title}
+${serializeEntry(entry, 'markdown')}`)
+ }
+ ]
+ }
+}
+
+
+/**
+ * The report includes a set of anomalies. It can also be useful to know
+ * what things looked fine, in other words what other anomalies could have
+ * been reported in theory. This can typically be used to identify issue files
+ * created in the past and that now need to be deleted.
+ *
+ * Note: Some anomalies may hide others. For example, a WebIDL update can make
+ * the Web IDL invalid... and hide other WebIDL issues that may still exist in
+ * the spec. This function may return false negatives as a result.
+ */
+function getNamesOfNonReportedEntries(report, specs, what, structure) {
+ const groups = [];
+ anomalyGroups.filter(group =>
+ what.includes('all') ||
+ what.includes(group.name) ||
+ group.types.find(type => what.includes(type.name)));
+ const types = [];
+ for (const group of anomalyGroups) {
+ if (what.includes('all') ||
+ what.includes(group.name) ||
+ group.types.find(type => what.includes(type.name))) {
+ groups.push(group);
+ for (const type of group.types) {
+ if (what.includes('all') ||
+ what.includes(group.name) ||
+ what.includes(type)) {
+ types.push(type);
+ }
+ }
+ }
+ }
+
+ const levels = structure.split('/')
+ .map(level => level.replace(/\s+/g, ''));
+ let allNames;
+ switch (levels[0]) {
+ case 'flat':
+ // Not much we can say there
+ break;
+ case 'type+spec':
+ allNames = specs
+ .map(spec => types.map(type => `${spec.shortname}-${type.name.toLowerCase()}`))
+ .flat();
+ break;
+ case 'group+spec':
+ allNames = specs
+ .map(spec => groups.map(group => `${spec.shortname}-${group.name.toLowerCase()}`))
+ .flat();
+ break;
+ case 'spec':
+ allNames = specs.map(spec => spec.shortname);
+ break;
+ case 'type':
+ allNames = types.map(type => type.name);
+ break;
+ case 'group':
+ allNames = groups.map(group => group.name);
+ break;
+ }
+ return allNames.filter(name => !report.find(entry => entry.name === name));
+}
+
+
+/**
+ * Main function that studies a crawl result and returns a structured
+ * report.
+ */
+export default async function study(specs, options = {}) {
+ // Copy the options object (we're going to add options on our own
+ // before calling other study methods)
+ options = Object.assign({}, options);
+
+ const what = options.what ?? ['all'];
+ const structure = options.structure ?? 'type + spec';
+ const format = options.format ?? 'issue';
+
+ if (!what.includes('all')) {
+ const validWhat = what.every(name =>
+ anomalyGroups.find(g => g.name === name || g.types.find(t => t.name === name)));
+ if (!validWhat) {
+ throw new Error('Invalid `what` option');
+ }
+ }
+ if (!reportStructures.find(s => structure.replace(/\s+/g, '') === s)) {
+ throw new Error('Invalid `structure` option');
+ }
+
+ // Only keep specs that caller wants to study
+ // (but note study functions that analyze references need the whole list!)
+ options.crawlResults = specs;
+ if (options.specs) {
+ specs = options.crawlResults.filter(spec => options.specs.find(shortname => shortname === spec.shortname));
+ }
+
+ // Anomalies are studied in groups of related anomalies, let's compute the
+ // studies that we need to run to answer the request
+ const groups = anomalyGroups.filter(group =>
+ what.includes('all') ||
+ what.includes(group.name) ||
+ group.types.find(type => what.includes(type.name)));
+
+ // Run studies and fill the anomaly report accordingly
+ let anomalies = [];
+ for (const group of groups) {
+ const studyResult = await group.study(specs, options);
+ const recordAnomaly = recordCategorizedAnomaly(
+ anomalies, group.name, group.types.map(t => t.name));
+ studyResult.map(an => recordAnomaly(an.spec ?? an.specs, an.name, an.message));
+ }
+
+ // Only keep anomalies whose types we're interested in
+ anomalies = anomalies.filter(anomaly =>
+ what.includes('all') ||
+ what.includes(anomaly.name) ||
+ what.includes(anomalyToGroup[anomaly.name].name));
+
+ // Now that we have a flat report of anomalies,
+ // let's structure and serialize it as requested
+ const report = structureResults(structure, anomalies, options.crawlResults);
+
+ // And serialize it using the right format
+ const result = {
+ type: 'study',
+ date: (new Date()).toJSON(),
+ structure,
+ what,
+ stats: {
+ crawled: options.crawlResults.length,
+ studied: specs.length,
+ anomalies: anomalies.length
+ },
+ results: formatReport(format, report),
+ looksGood: getNamesOfNonReportedEntries(report, specs, what, structure)
+ };
+
+ // Return the structured report
+ return result;
+}
\ No newline at end of file
diff --git a/src/reporting/file-issue-for-review.js b/src/reporting/file-issue-for-review.js
index 0d797fbe..f5cea3f8 100644
--- a/src/reporting/file-issue-for-review.js
+++ b/src/reporting/file-issue-for-review.js
@@ -1,280 +1,176 @@
-/* Takes a report of anomalies produced by Strudy,
- creates a draft of an issue per spec and per anomaly type
- and submits as a pull request in this repo if no existing one matches
+/**
+ * Looks at draft issue files produced by the Strudy CLI in the issues folder
+ * and submits new/updated/deleted ones as pull requests in this repo if there
+ * is no pending pull request already.
*/
-import { loadCrawlResults } from '../lib/util.js';
-import studyBackrefs from '../lib/study-backrefs.js';
-import studyReferences from '../lib/study-refs.js';
-import isInMultiSpecRepository from '../lib/is-in-multi-spec-repo.js';
-import loadJSON from '../lib/load-json.js';
import path from 'node:path';
import fs from 'node:fs/promises';
+import { fileURLToPath } from "node:url";
import { execSync } from 'node:child_process';
-import Octokit from '../lib/octokit.js';
import matter from 'gray-matter';
+import { Command, InvalidArgumentError } from 'commander';
-const config = await loadJSON("config.json");
-const GH_TOKEN = config?.GH_TOKEN ?? process.env.GH_TOKEN;
+/**
+ * Command-line execution parameters for calls to `execSync`
+ */
+const scriptPath = path.dirname(fileURLToPath(import.meta.url));
+const execParams = {
+ cwd: path.join(scriptPath, '..', '..'),
+ encoding: 'utf8'
+};
-const MAX_PR_BY_RUN = 10;
-const repoOwner = 'w3c';
-const repoName = 'strudy';
+/**
+ * Wrap "matter" issue report to create a suitable PR body
+ */
+function prWrapper(action, issueReport) {
+ if (action === 'add') {
+ return `This pull request was automatically created by Strudy upon detecting errors in ${issueReport.data.Title}.
-const octokit = new Octokit({
- auth: GH_TOKEN
- // log: console
-});
-
-function issueWrapper (spec, anomalies, anomalyType, crawl) {
- const titlePrefix = isInMultiSpecRepository(spec, crawl.ed) ? `[${spec.shortname}] ` : '';
- let anomalyReport = ''; let title = '';
- switch (anomalyType) {
- case 'brokenLinks':
- title = `Broken references in ${spec.title}`;
- anomalyReport = 'the following links to other specifications were detected as pointing to non-existing anchors';
- break;
- case 'outdatedSpecs':
- title = `Outdated references in ${spec.title}`;
- anomalyReport = 'the following links were detected as pointing to outdated specifications';
- break;
- case 'nonCanonicalRefs':
- title = `Non-canonical references in ${spec.title}`;
- anomalyReport = 'the following links were detected as pointing to outdated URLs';
- break;
- case 'discontinuedReferences':
- title = `Normative references to discontinued specs in ${spec.title}`;
- anomalyReport = 'the following normative referenced were detected as pointing to discontinued specifications';
- break;
- }
- return {
- title: titlePrefix + title,
- content: `
-While crawling [${spec.title}](${spec.crawled}), ${anomalyReport}:
-${anomalies.map(anomaly => `* [ ] ${anomaly.message}`).join('\n')}
-
-This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).`
- };
-}
-
-function prWrapper (title, uri, repo, issueReport) {
- return `This pull request was automatically created by Strudy upon detecting errors in ${title}.
-
-Please check that these errors were correctly detected, and that they have not already been reported in ${repo}.
+Please check that these errors were correctly detected, and that they have not already been reported in ${issueReport.data.Repo}.
If everything is OK, you can merge this pull request which will report the issue below to the repo, and update the underlying report file with a link to the said issue.
-${issueReport}
+${issueReport.stringify()}
`;
-}
-
-
-const knownAnomalyTypes = ['brokenLinks', 'outdatedSpecs', 'nonCanonicalRefs', 'discontinuedReferences'];
+ }
+ else {
+ return `This pull request was automatically created by Strudy while analyzing ${issueReport.data.Title}.
-let edCrawlResultsPath = process.argv[2];
-let trCrawlResultsPath = process.argv[3];
-const anomalyFilter = process.argv.slice(4).filter(p => !p.startsWith('--'));
-const unknownAnomalyType = anomalyFilter.find(p => !knownAnomalyTypes.includes(p));
-if (unknownAnomalyType) {
- console.error(`Unknown report type ${unknownAnomalyType} - known types are ${knownAnomalyTypes.join(', ')}`);
- process.exit(1);
-}
-const anomalyTypes = anomalyFilter.length ? anomalyFilter : knownAnomalyTypes;
-const updateMode = process.argv.includes('--update') ? 'update-untracked' : (process.argv.includes('--update-tracked') ? 'update-tracked' : false);
-const dryRun = process.argv.includes('--dry-run');
-const noGit = dryRun || updateMode || process.argv.includes('--no-git');
+Please check that past errors listed below have indeed been corrected, and that the related issue in ${issueReport.data.Repo} has been closed accordingly.
-if (!noGit && !GH_TOKEN) {
- console.error('GH_TOKEN must be set to some personal access token as an env variable or in a config.json file');
- process.exit(1);
-}
+If everything looks OK, you can merge this pull request to delete the issue file.
-// Target the index file if needed
-if (!edCrawlResultsPath.endsWith('index.json')) {
- edCrawlResultsPath = path.join(edCrawlResultsPath, 'index.json');
-}
-if (!trCrawlResultsPath.endsWith('index.json')) {
- trCrawlResultsPath = path.join(trCrawlResultsPath, 'index.json');
+${issueReport.stringify()}
+`;
+ }
}
-let existingReports = [];
-if (updateMode) {
- console.log('Compiling list of relevant existing issue reports…');
- // List all existing reports to serve as a comparison point
- // to detect if any report can be deleted
- // if the anomalies are no longer reported
- const reportFiles = (await fs.readdir('issues')).map(p => 'issues/' + p);
- for (const anomalyType of anomalyTypes) {
- existingReports = existingReports.concat(reportFiles.filter(p => p.endsWith(`-${anomalyType.toLowerCase()}.md`)));
+/**
+ * Parse the maximum number of pull requests option as integer
+ */
+function myParseInt(value) {
+ const parsedValue = parseInt(value, 10);
+ if (isNaN(parsedValue)) {
+ throw new InvalidArgumentError('Not a number.');
}
- console.log('- done');
+ return parsedValue;
}
-const nolongerRelevantReports = new Set(existingReports);
-// Donwload automatic map of multipages anchors in HTML spec
-let htmlFragments = {};
-try {
- console.log('Downloading HTML spec fragments data…');
- htmlFragments = await fetch('https://html.spec.whatwg.org/multipage/fragment-links.json').then(r => r.json());
- console.log('- done');
-} catch (err) {
- console.log('- failed: could not fetch HTML fragments data, may report false positive broken links on HTML spec');
-}
+const program = new Command();
+program
+ .description('File added/updated/deleted issue files as individual GitHub pull requests')
+ .option('--dry-run', 'run the script without creating any actual pull request')
+ .option('-m, --max ', 'maximum number of pull requests to create/update', myParseInt, 10)
+ .showHelpAfterError('(run with --help for usage information)')
+ .addHelpText('after', `
+Minimal usage example:
+ To create up to 10 pull requests from local issue files, run:
+ $ node file-issue-for-review.js
+
+Description:
+ The command looks into the \`issues\` folder to find files that have been
+ added, updated or deleted, and that have not yet been committed to the
+ repository. For each of them, it creates a pull request on GitHub, unless one
+ already exists.
+
+ The \`gh\` and \`git\` CLI commands must be available and functional. The
+ command will push Git updates to the \`origin\` remote, which must exist.
+
+Usage notes for some of the options:
+--dry-run
+ Run the script without committing anything, and without creating any actual
+ pull request. The option is meant for debugging.
+
+-m, --max
+ Maximum number of pull requests to create. Defaults to 10.
+
+ You may set the option to 0 to create as many pull requests as needed. You
+ may want to check that there aren't too many pull requests to create first,
+ though!
+`)
+ .action(async (options) => {
+ function execOrLog(cmd) {
+ options.dryRun ? console.log(cmd) : execSync(cmd, execParams);
+ }
-console.log(`Opening crawl results ${edCrawlResultsPath} and ${trCrawlResultsPath}…`);
-const crawl = await loadCrawlResults(edCrawlResultsPath, trCrawlResultsPath);
-console.log('- done');
-console.log('Running references analysis…');
-// TODO: if we're not running all the reports, this could run only the
-// relevant study function
-const results = studyBackrefs(crawl.ed, crawl.tr, htmlFragments).concat(studyReferences(crawl.ed));
-console.log('- done');
-const currentBranch = noGit || execSync('git branch --show-current', { encoding: 'utf8' }).trim();
-const needsPush = {};
-for (const anomalyType of anomalyTypes) {
- const anomalies = results.filter(r => r.name === anomalyType);
- const specs = [...new Set(anomalies.map(a => a.specs.map(s => s.url)).flat())];
- for (const url of specs) {
- const specAnomalies = anomalies.filter(a => a.specs[0].url === url);
- const spec = specAnomalies[0].specs[0];
- console.log(`Compiling ${anomalyType} report for ${spec.title}…`);
- // if we don't know the repo, we can't file an issue
- if (!spec.nightly?.repository) {
- console.log(`No known repo for ${spec.title}, skipping`);
- continue;
+ if (options.dryRun) {
+ console.log('DRY RUN!');
+ console.log('The command won\'t make any actual change.');
}
- if (spec.standing === "discontinued") {
- console.log(`${spec.title} is discontinued, skipping`);
- continue;
+ console.log('How many pull requests can we use to change the world?');
+ console.log(`- nb pull requests that we may create: ${options.max}`);
+
+ console.log('On which Git branch are we?');
+ const currentBranch = execSync('git branch --show-current', execParams).trim();
+ console.log(`- current branch: ${currentBranch}`);
+
+ // Possibly useful reminder about calls to `filter` below:
+ // `split` on an empty string does not return an empty array!
+ console.log('How many issue files ought to be reported?');
+ const toadd = execSync('git diff --name-only --diff-filter=d issues/*.md', execParams)
+ .trim().split('\n').filter(x => !!x);
+ console.log(`- nb issue files to add/update: ${toadd.length}`);
+ const todelete = execSync('git diff --name-only --diff-filter=D issues/*.md', execParams)
+ .trim().split('\n').filter(x => !!x);
+ console.log(`- nb issue files to delete: ${todelete.length}`);
+ const toreport = toadd.map(name => { return { action: 'add', filename: name }; })
+ .concat(todelete.map(name => { return { action: 'delete', filename: name }; }))
+ .sort((e1, e2) => e1.filename.localeCompare(e2.filename));
+
+ if (toreport.length === 0) {
+ console.log('No issue files to report');
}
- const issueMoniker = `${spec.shortname}-${anomalyType.toLowerCase()}`;
- // is there already a file with that moniker?
- const issueFilename = path.join('issues/', issueMoniker + '.md');
- let tracked = 'N/A';
- let existingReportContent;
+
+ let reported = 0;
try {
- if (!(await fs.stat(issueFilename)).isFile()) {
- console.error(`${issueFilename} already exists but is not a file`);
- continue;
- } else {
- if (!updateMode) {
- console.log(`${issueFilename} already exists, bailing`);
+ console.log('Create pull requests as needed...');
+ for (const entry of toreport) {
+ // Look for a related PR that may still be pending
+ const issueMoniker = entry.filename.match(/^issues\/(.*)\.md$/)[1];
+ const pendingPRStr = execSync(`gh pr list --head ${issueMoniker} --json number,headRefName`, execParams);
+ const pendingPR = JSON.parse(pendingPRStr)[0];
+ if (pendingPR) {
+ console.log(`- skip ${entry.filename}, a pending PR already exists (#${pendingPR.number}`);
continue;
- } else {
- nolongerRelevantReports.delete(issueFilename);
- try {
- const existingReport = matter(await fs.readFile(issueFilename, 'utf-8'));
- tracked = existingReport.data.Tracked;
- existingReportContent = existingReport.content;
- // only update tracked or untracked reports based on
- // CLI parameter
- if ((updateMode === 'update-untracked' && tracked !== 'N/A') || (updateMode === 'update-tracked' && tracked === 'N/A')) {
- continue;
- }
- } catch (e) {
- console.error('Failed to parse existing content', e);
- continue;
- }
}
- }
- } catch (err) {
- // Intentionally blank
- }
- // if not, we create the file, add it in a branch
- // and submit it as a pull request to the repo
- const { title, content: issueReportContent } = issueWrapper(spec, specAnomalies, anomalyType, crawl);
- if (updateMode) {
- if (existingReportContent) {
- const existingAnomalies = existingReportContent.split('\n').filter(l => l.startsWith('* [ ] ')).map(l => l.slice(6));
- if (existingAnomalies.every((a, i) => specAnomalies[i] === a) && existingAnomalies.length === specAnomalies.length) {
- // no substantial change, skip
- console.log(`Skipping ${title}, no change`);
- continue;
+
+ let issueReport;
+ if (entry.action === 'add') {
+ issueReport = matter(await fs.readFile(entry.filename, 'utf-8'));
}
- } else {
- // in update mode, we only care about existing reports
- continue;
- }
- }
- const issueReportData = matter(issueReportContent);
- issueReportData.data = {
- Repo: spec.nightly.repository,
- Tracked: tracked,
- Title: title
- };
- let issueReport;
- try {
- issueReport = issueReportData.stringify();
- } catch (err) {
- console.error(`Failed to stringify report of ${anomalyType} for ${title}: ${err}`, issueReportContent);
- continue;
- }
- if (dryRun) {
- console.log(`Would add ${issueFilename} with`);
- console.log(issueReport);
- console.log();
- } else {
- await fs.writeFile(issueFilename, issueReport, 'utf-8');
- try {
- if (!noGit) {
- console.log(`Committing issue report as ${issueFilename} in branch ${issueMoniker}…`);
- execSync(`git checkout -b ${issueMoniker}`);
- execSync(`git add ${issueFilename}`);
- execSync(`git commit -m "File report on ${issueReportData.data.Title}"`);
- needsPush[issueMoniker] = { title: issueReportData.data.Title, report: issueReport, repo: spec.nightly.repository, specTitle: spec.title, uri: spec.crawled };
- console.log('- done');
- execSync(`git checkout ${currentBranch}`);
+ else {
+ // File was deleted, retrieve its previous content from the HEAD
+ issueReport = matter(await execSync(`git show HEAD:${entry.filename}`, execParams));
+ }
+
+ console.log(`- create PR for ${entry.filename}`);
+ execOrLog(`git checkout -b ${issueMoniker}`);
+ execOrLog(`git add ${entry.filename}`);
+ execOrLog(`git commit -m "${entry.action === 'add' ? 'File' : 'Delete'} report on ${issueReport.data.Title}"`);
+ execOrLog(`git push origin ${issueMoniker}`);
+
+ const prBodyFile = path.join(execParams.cwd, '__pr.md')
+ const prBody = prWrapper(entry.action, issueReport);
+ await fs.writeFile(prBodyFile, prBody, 'utf8');
+ try {
+ execOrLog(`gh pr create --body-file __pr.md --title "${entry.action === 'add' ? 'File' : 'Delete'} report on ${issueReport.data.Title.replace(/"/g, '')}"`);
+ }
+ finally {
+ await fs.rm(prBodyFile, { force: true });
+ }
+
+ reported += 1;
+ if (options.max > 0 && reported > options.max) {
+ break;
}
- } catch (err) {
- console.error(`Failed to commit error report for ${spec.title}`, err);
- await fs.unlink(issueFilename);
- execSync(`git checkout ${currentBranch}`);
}
}
- }
-}
-if (nolongerRelevantReports.size) {
- console.log('The following reports are no longer relevant, deleting them', [...nolongerRelevantReports]);
- for (const issueFilename of nolongerRelevantReports) {
- await fs.unlink(issueFilename);
- }
-}
-if (Object.keys(needsPush).length) {
- let counter = 0;
- for (const branch in needsPush) {
- if (counter > MAX_PR_BY_RUN) {
- delete needsPush[branch];
- continue;
+ finally {
+ console.log(`- get back to the initial Git branch ${currentBranch}`);
+ execOrLog(`git checkout ${currentBranch}`, execParams);
+ console.log(`- nb PR ${options.dryRun ? 'that would be ' : ''}created: ${reported}`);
}
+ });
- // is there already a pull request targetting that branch?
- const { data: pullrequests } = (await octokit.rest.pulls.list({
- owner: repoOwner,
- repo: repoName,
- head: `${repoOwner}:${branch}`
- }));
- if (pullrequests.length > 0) {
- console.log(`A pull request from branch ${branch} already exists, bailing`);
- delete needsPush[branch];
- }
- counter++;
- }
-}
-if (Object.keys(needsPush).length) {
- console.log(`Pushing new branches ${Object.keys(needsPush).join(' ')}…`);
- execSync(`git push origin ${Object.keys(needsPush).join(' ')}`);
- console.log('- done');
- for (const branch in needsPush) {
- const { title, specTitle, uri, repo, report } = needsPush[branch];
- console.log(`Creating pull request from branch ${branch}…`);
- await octokit.rest.pulls.create({
- owner: repoOwner,
- repo: repoName,
- title,
- body: prWrapper(specTitle, uri, repo, report),
- head: `${repoOwner}:${branch}`,
- base: 'main'
- });
- console.log('- done');
- }
-}
+program.parseAsync(process.argv);
\ No newline at end of file
diff --git a/strudy.js b/strudy.js
index a766d8fb..bbd4bdd2 100644
--- a/strudy.js
+++ b/strudy.js
@@ -7,27 +7,25 @@
* Provided Strudy was installed as a global package, the spec analyzer can be
* called directly through:
*
- * `strudy [options] [report]`
- *
- * Use the `--help` option for usage instructions.
+ * `strudy --help`
*
* If Strudy was not installed as a global package, call:
*
- * `node strudy.js [options] [report]`
+ * `node strudy.js --help`
*
* @module crawler
*/
-import { Command } from 'commander';
+import { Command, InvalidArgumentError } from 'commander';
import { constants as fsConstants } from 'node:fs';
import fs from 'node:fs/promises';
-import pandoc from 'node-pandoc';
import path from 'node:path';
import satisfies from 'semver/functions/satisfies.js';
import packageContents from './package.json' with { type: 'json' };
-import studyCrawl from './src/lib/study-crawl.js';
-import generateReport from './src/lib/generate-report.js';
+import study from './src/lib/study.js';
import loadJSON from './src/lib/load-json.js';
+import { expandCrawlResult } from 'reffy';
+import matter from 'gray-matter';
// Warn if version of Node.js does not satisfy requirements
const { version, engines } = packageContents;
@@ -48,55 +46,48 @@ async function exists(file) {
}
}
-
-async function isStudyReport(file) {
- const fd = await fs.open(file, 'r');
- try {
- const buff = Buffer.alloc(1024);
- await fd.read(buff, 0, 1024);
- const str = buff.toString();
- if (str.match(/"type"\s*:\s*"study"/)) {
- return true;
- }
- }
- catch {
- return false;
- }
- finally {
- await fd.close();
+function myParseInt(value) {
+ const parsedValue = parseInt(value, 10);
+ if (isNaN(parsedValue)) {
+ throw new InvalidArgumentError('Not a number.');
}
+ return parsedValue;
}
-
const program = new Command();
program
.name('strudy')
- .description('Analyzes a crawl report generated by Reffy')
- .version(version)
- .usage('[options] ')
- .argument('', 'Path/URL to crawl report or study file')
- .option('-f, --format ', 'create a markdown/HTML report from study file')
- .option('-d, --diff ', 'create a diff from some reference study')
+ .description('Analyzes a crawl report generated by Reffy to detect anomalies in specifications')
+ .version(version);
+
+program
+ .command('inspect')
+ .alias('study')
+ .argument('', 'Path/URL to crawl report')
+ .option('-f, --format ', 'report markdown or json', 'markdown')
+ .option('-i, --issues ', 'report issues as markdown files in the given folder')
+ .option('-m, --max ', 'maximum number of issue files to create/update', myParseInt, 0)
.option('-s, --spec ', 'restrict analysis to given specs')
- .option('--dep', 'create a dependencies report')
- .option('--onlynew', 'only include new diff in the diff report')
- .option('--perissue', 'create a markdown/HTML report per issue')
- .option('--tr ', 'Path/URL to crawl report on published specs')
+ .option('--structure ', 'report structure', 'type+spec')
+ .option('--tr ', 'path/URL to crawl report on published specs')
+ .option('--update-mode ', 'what issue files to update', 'new')
+ .option('-w, --what ', 'what to analyze', ['all'])
.showHelpAfterError('(run with --help for usage information)')
.addHelpText('after', `
Minimal usage example:
To study a crawl report in current folder:
- $ strudy .
+ $ strudy inspect .
Description:
Analyzes a crawl report generated by Reffy and create a report with potential
anomalies in each of the specs contained in the crawl report.
- The report is written to the console as a serialized JSON object or as a
- markdown or HTML report depending on command options.
+ Depending on command options, the report is either written to the console as
+ a serialized JSON object or as a markdown report (see the --format option),
+ or written to individual issues files in a folder (see the --issues option).
Argument:
-
+
Path to the crawl report to analyze. If the path leads to a folder, Strudy
will look for an "ed/index.json" file under that folder first (if it exists,
it will also look for a possible "tr/index.json" file to set the --tr option),
@@ -105,51 +96,65 @@ Argument:
Usage notes for some of the options:
-f, --format
Tell Strudy to return a report in the specified format. Format may be one of
- "json" (default when option is not set), "markdown" or "html".
+ "markdown" (default when option is not set) or "json".
+
+ The --format option cannot be set to "json" if the --issues option is set.
+
+-i, --issues
+ Tell Strudy to report the anomalies in anomaly files in the given folder.
+ An anomaly file gets created for and named after keys at the first level of
+ the report (see --structure option).
- When the option is specified to either "markdown" or "html", the report
- pointed to by may be a JSON file that contains a Strudy report.
+ Anomaly files are in markdown. The --format option must be set to "markdown",
+ or not set at all.
--d, --diff
- Tell Strudy tool to return a diff from the provided reference Strudy report.
- must point to a Strudy report.
+ Anomaly files start with metadata, used to convert the file to a GitHub issue
+ and track the resolution of the issue afterwards: "Repo" sets the repository
+ for the issue, "Title" the title of the issue, and "Tracked" the URL of the
+ issue, once created.
- When the option is specified, the report pointed to by may be a JSON
- file that contains a Strudy report.
+ Existing anomaly files in the folder are preserved by default, set the
+ --update-mode option to change that behavior.
- Diff reports are in markdown and the "--format" option, if specified, must be
- "markdown".
+-m, --max
+ Maximum number of issue files to add or update. Defaults to 0, which means
+ "no limit".
- The --diff option and the --dep option cannot both be set.
+ This setting should only be useful when combined with --issues to create
+ issue files in batches. It may also be set in the absence of --issues, in
+ which case it restricts the number of entries at the first level of the
+ report (see --structure).
-s, --spec
- Valid spec values may be a shortname, a URL, or a relative path to JSON file
- that contains a list of spec URLs and/or shortnames. Shortnames may be the
- shortname of the spec series.
+ Valid spec values may be a shortname, a URL, or a relative path to a JSON
+ file that contains a list of spec URLs and/or shortnames. Shortnames may be
+ the shortname of the spec series.
Use "all" to include all specs. This is equivalent to not setting the option
at all.
For instance:
- $ strudy . --spec picture-in-picture https://w3c.github.io/mediasession/
-
---dep
- Tell Strudy to return a dependencies report.
-
- When the option is specified, the report pointed to by may be a JSON
- file that contains a Strudy report.
-
- Dependencies reports are in markdown and the "--format" option, if specified,
- must be "markdown".
-
- The --diff option and the --dep option cannot both be set.
-
---perissue
- Markdown/HTML reports are per spec by default. Set this option to tell Strudy
- to generate markdown/HTML reports per issue instead.
-
- The --diff option must not be set.
- The --format option must be set to either "markdown" or "html".
+ $ strudy inspect . --spec picture-in-picture https://w3c.github.io/mediasession/
+
+--structure
+ Describes the hierarchy in the report(s) that Strudy returns. Possible values:
+ "flat" no level, report anomalies one by one
+ "type+spec" one level with one entry per type and spec (default)
+ "group+spec/type" first level per group and spec, second level per type
+ "spec/type" first level per spec, second level per type
+ "spec/group/type" first level per spec, second level per group, third level
+ per type
+ "type/spec" first level per type, second level per spec
+ "group/type/spec" first level per group, second level per type, third level
+ per spec
+ "group/spec/type" first level per group, second level per spec, third level
+ per type
+
+ Last level contains the actual list of anomalies.
+
+ Note: an anomaly always has a "type". Related anomaly types are grouped in an
+ anomaly "group". For example, "brokenLinks" and "datedUrls" both belong to
+ the "backrefs" group (also see the --what option).
--tr
Useful for Strudy to refine its broken link analysis when crawl report
@@ -159,113 +164,199 @@ Usage notes for some of the options:
version lags behind the Editor's Draft may have issues of the form "The term
exists in the /TR version but no longer exists in the Editor's Draft".
- Note that if is a link to a folder, the tool will automatically look
+ Note that if is a link to a folder, the tool will automatically look
for the TR crawl report in a "tr" subfolder and set itself.
+
+--update-mode
+ Tell Strudy what issue files to update when --issues is set and an issue file
+ already exists for the issue at hand. Possible values are:
+ "new" (default) preserve existing files
+ "old" preserve existing files but get rid of old ones for which
+ study reveals no more issue
+ "untracked" update existing files that do not have a "Tracked" URL
+ "tracked" update existing files that have a "Tracked" URL
+ "all" update all existing files, deleting them when needed
+
+ Strudy will always create new issue files, the mode only changes the behavior
+ for existing issue files.
+
+ The --issues option must be set.
+
+-w, --what
+ Tell Strudy which anomalies to analyze. Values can be the names of anomaly
+ types or the name of anomaly groups. The value "all" (default) tells Strudy
+ to analyze and report on all possible anomalies.
+
+ The list of anomaly types and groups will likely evolve over time, see actual
+ list in src/lib/study.js.
+
+ Examples:
+ "-w algorithms -w backrefs" to study algorithms and references to other specs
+ "-w unknownSpecs" to study links to unknown specs
`)
.action(async (report, options) => {
- if (options.format && !['json', 'markdown', 'html'].includes(options.format)) {
+ // Check options
+ if (options.format && !['json', 'markdown'].includes(options.format)) {
console.error(`Unsupported --format option "${options.format}".
-Format must be one of "json", "markdown" or "html".`)
+Format must be one of "json" or "markdown".`)
process.exit(2);
}
- if (options.diff && options.format && (options.format !== 'markdown')) {
- console.error(`Diff reports are always in markdown.
-The --format option can only be set to "markdown" when --diff is used.`);
+ if (options.format !== 'markdown' && options.issues) {
+ console.error(`The --format option can only be set to "markdown" when --issues is used.`);
process.exit(2);
}
- if (options.diff && options.perissue) {
- console.error('The --diff and --perissue options cannot both be set.');
+ if (options.updateMode && !['new', 'old', 'untracked', 'tracked', 'all'].includes(options.updateMode)) {
+ console.error(`Unsupported --update-mode option "${options.updateMode}"`);
process.exit(2);
}
- if (options.perissue && !['markdown', 'html'].includes(options.format)) {
- console.error('The --format option must be "markdown" or "html" when --perissue is set.')
+ if (options.updateMode !== 'new' && !options.issues) {
+ console.error('The --update-mode option can only be set when --issues is set');
process.exit(2);
}
- if (options.dep && options.diff) {
- console.error('The --dep and --diff options cannot both be set.');
+ if (options.issues && !await exists(options.issues)) {
+ console.error(`Could not find/access the folder to store anomalies: ${options.issues}`)
process.exit(2);
}
- let edReport = report;
- let trReport = options.tr;
+ // Load (and expand) the crawl results
+ let edReportFile = report;
+ let trReportFile = options.tr;
if (!report.endsWith('.json')) {
if (await exists(path.join(report, 'ed'))) {
- edReport = path.join(report, 'ed');
- if (!trReport && await exists(path.join(report, 'tr'))) {
- trReport = path.join(report, 'tr');
+ edReportFile = path.join(report, 'ed');
+ if (!trReportFile && await exists(path.join(report, 'tr'))) {
+ trReportFile = path.join(report, 'tr');
}
}
- edReport = path.join(edReport, 'index.json');
+ edReportFile = path.join(edReportFile, 'index.json');
}
- if (!await exists(edReport)) {
+ if (!await exists(edReportFile)) {
console.error(`Could not find/access crawl/study report: ${report}`);
process.exit(2);
}
- if (trReport) {
- if (!trReport.endsWith('.json')) {
- trReport = path.join(trReport, 'index.json');
+ if (trReportFile) {
+ if (!trReportFile.endsWith('.json')) {
+ trReportFile = path.join(trReportFile, 'index.json');
}
- if (!await exists(trReport)) {
+ if (!await exists(trReportFile)) {
console.error(`Could not find/access TR crawl report: ${options.tr}`);
process.exit(2);
}
}
- // Specified report may already be the study report
- // To find out, we'll do a bit of content sniffing to avoid loading the
- // report twice (report file may be somewhat large).
- let study = null;
- const isStudy = await isStudyReport(edReport);
- if (isStudy) {
- study = await loadJSON(edReport);
+ let edReport = await loadJSON(edReportFile);
+ edReport = await expandCrawlResult(edReport, path.dirname(edReportFile));
+
+ let trReport;
+ if (trReportFile) {
+ trReport = await loadJSON(trReportFile);
+ trReport = await expandCrawlResult(trReport, path.dirname(trReportFile));
}
- if (!study) {
- const studyOptions = {
- include: options.spec ?? null,
- trResults: trReport
+ // Create a structured anomaly report out of the crawl report
+ const anomaliesReport = await study(edReport.results, {
+ what: options.what,
+ structure: options.structure,
+ format: options.format === 'json' ?
+ 'json' :
+ (options.issues ? 'issue' : 'full'),
+ trResults: trReport?.results ?? [],
+ specs: options.spec
+ });
+
+ // Output the structured anomaly report
+ if (options.format === 'json') {
+ // Caller wants a JSON report. We'll just trim the number of anomalies
+ // in the first level to the requested maximum as needed
+ if (options.max > 0) {
+ anomaliesReport.results = anomaliesReport.results.slice(0, options.max);
}
- study = await studyCrawl(edReport, studyOptions);
+ console.log(JSON.stringify(anomaliesReport, null, 2));
}
+ else if (options.issues) {
+ // Caller wants to add/update issue files in the provided folder.
+ // Issue files are formatted with the gray-matter library to save useful
+ // metadata as front matter in the file.
+ let reported = 0;
+ for (const entry of anomaliesReport.results) {
+ const filename = path.join(options.issues, `${entry.name}.md`);
+ let existingReport;
+ let tracked = 'N/A';
+ if (await exists(filename)) {
+ if (options.updateMode === 'new' ||
+ options.updateMode === 'old') {
+ console.warn(`- skip ${filename}, file already exists`);
+ continue;
+ }
+ existingReport = matter(await fs.readFile(filename, 'utf-8'));
+ tracked = existingReport.data.Tracked ?? 'N/A';
+ if ((options.updateMode === 'tracked' && tracked === 'N/A') ||
+ (options.updateMode === 'untracked' && tracked !== 'N/A')) {
+ console.warn(`- skip ${filename}, file already exists, with Tracked="${tracked}"`);
+ continue;
+ }
+ }
- let res = null;
- if (options.diff || options.dep) {
- // Generate diff/dependencies report
- res = await generateReport(study, {
- depReport: options.dep,
- diffReport: !!options.diff,
- refStudyFile: options.diff,
- onlyNew: options.onlynew
- });
- }
- else if (options.format && options.format !== 'json') {
- // Generate markdown report and possibly an HTML report
- const generateOptions = { perSpec: !options.perissue };
- const markdown = await generateReport(study, generateOptions);
-
- if (options.format === 'html') {
- const template = path.join(__dirname, 'src', 'templates',
- `report${options.perissue ? '-perissue' : ''}-template.html`);
- const promise = new Promise((resolve, reject) => {
- let args = [
- '-f', 'markdown', '-t', 'html5', '--section-divs', '-s',
- '--template', template
- ];
- pandoc(markdown, args, (err, result) =>
- err ? reject(err) : resolve(result));
- });
- res = await promise;
+ const content = `
+${entry.content}
+
+This issue was detected and reported semi-automatically by [Strudy](https://github.com/w3c/strudy/) based on data collected in [webref](https://github.com/w3c/webref/).`;
+ // Note from @tidoust: One day, I'll understand how to set up Git and
+ // code so that all line endings end up being "\n" even on Windows
+ // machines. In the meantime, note that local issue files may well
+ // contain "\r\n" on Windows machines.
+ if (existingReport?.content.replace(/\r\n/g, '\n').trim() === content.trim()) {
+ console.warn(`- skip ${filename}, file already exists, no change`);
+ continue;
+ }
+
+ const issueReport = matter(content);
+ issueReport.data = {
+ Title: entry.title,
+ Tracked: tracked
+ };
+ if (entry.spec) {
+ const spec = edReport.results.find(spec => spec.url === entry.spec.url);
+ if (spec.nightly?.repository) {
+ issueReport.data.Repo = spec.nightly.repository;
+ }
+ }
+ console.warn(`- ${existingReport ? 'update' : 'add'} ${filename}`);
+ const filecontent = issueReport.stringify();
+ await fs.writeFile(filename, filecontent, 'utf-8');
+ reported += 1;
+ if (options.max > 0 && reported >= options.max) {
+ break;
+ }
}
- else {
- res = markdown;
+
+ if (options.updateMode === 'old' ||
+ options.updateMode === 'all') {
+ const reportFiles = await fs.readdir(options.issues);
+ const todelete = reportFiles.filter(file =>
+ anomaliesReport.looksGood.find(name => file === `${name}.md`));
+ for (const file of todelete) {
+ const filename = path.join(options.issues, file);
+ console.warn(`- delete ${filename}, no more anomalies detected`);
+ await fs.rm(filename, { force: true });
+ }
}
}
else {
- // Output the study report to the console
- res = JSON.stringify(study, null, 2);
+ // Caller wants a markdown report written to the console.
+ // The anomalies report should already be a "full" one (so only one
+ // result item at the first level).
+ const content = anomaliesReport.results[0].content;
+ let reported = 0;
+ for (const entry of content) {
+ console.log(entry);
+ console.log();
+ reported += 1;
+ if (options.max > 0 && reported >= options.max) {
+ break;
+ }
+ }
}
-
- console.log(res);
});
program.parseAsync(process.argv);
diff --git a/test/cli.js b/test/cli.js
index a48b3c2d..4c6ac976 100644
--- a/test/cli.js
+++ b/test/cli.js
@@ -26,19 +26,51 @@ describe(`Strudy's CLI`, function () {
it('reports usage help when asked', async function () {
const { stdout, stderr } = await strudy(`--help`);
- assert.match(stdout, /^Usage: strudy \[options\] /);
+ assert.match(stdout, /^Usage: strudy \[options\] \[command\]/);
assert.deepEqual(stderr, '');
});
- it('expects a report argument', async function () {
- const { stdout, stderr } = await strudy(``);
- assert.match(stderr, /error: missing required argument 'report'/);
- assert.deepEqual(stdout, '');
- });
+ describe(`The "inspect" command`, function () {
+ it('expects a crawl report as argument', async function () {
+ const { stdout, stderr } = await strudy(`inspect`);
+ assert.match(stderr, /error: missing required argument 'crawl'/);
+ assert.deepEqual(stdout, '');
+ });
+
+ it('reports an error when provided crawl report does not exist', async function () {
+ const { stdout, stderr } = await strudy(`inspect notareport`);
+ assert.match(stderr, /Could not find/);
+ assert.deepEqual(stdout, '');
+ });
+
+ it('reports an error when provided issues folder does not exist', async function () {
+ const { stdout, stderr } = await strudy(`inspect test/data/empty.json --issues notafolder`);
+ assert.match(stderr, /Could not find\/access the folder to store anomalies/);
+ assert.deepEqual(stdout, '');
+ });
+
+ it('refuses formats other than "json" or "markdown"', async function () {
+ const { stdout, stderr } = await strudy(`inspect test/data/empty.json --format html`);
+ assert.match(stderr, /Unsupported --format option/);
+ assert.deepEqual(stdout, '');
+ });
+
+ it('rejects incompatible format and issues options', async function () {
+ const { stdout, stderr } = await strudy(`inspect test/data/empty.json --format json --issues issues`);
+ assert.match(stderr, /The --format option can only be set to "markdown" when --issues is used/);
+ assert.deepEqual(stdout, '');
+ });
+
+ it('reports an error when update-mode is set but not the issues option', async function () {
+ const { stdout, stderr } = await strudy(`inspect test/data/empty.json --update-mode all`);
+ assert.match(stderr, /The --update-mode option can only be set when --issues is set/);
+ assert.deepEqual(stdout, '');
+ });
- it('reports an error when provided report does not exist', async function () {
- const { stdout, stderr } = await strudy(`notareport`);
- assert.match(stderr, /Could not find/);
- assert.deepEqual(stdout, '');
+ it('reports an error when update-mode is set to some unknown mode', async function () {
+ const { stdout, stderr } = await strudy(`inspect test/data/empty.json --issues issues --update-mode notamode`);
+ assert.match(stderr, /Unsupported --update-mode option/);
+ assert.deepEqual(stdout, '');
+ })
});
});
\ No newline at end of file
diff --git a/test/data/empty.json b/test/data/empty.json
new file mode 100644
index 00000000..914332ed
--- /dev/null
+++ b/test/data/empty.json
@@ -0,0 +1,3 @@
+{
+ "results": []
+}
\ No newline at end of file
diff --git a/test/study-algorithms.js b/test/study-algorithms.js
new file mode 100644
index 00000000..c5b19888
--- /dev/null
+++ b/test/study-algorithms.js
@@ -0,0 +1,37 @@
+import study from '../src/lib/study-algorithms.js';
+import { assertNbAnomalies, assertAnomaly } from './util.js';
+
+describe('The algorithms analyser', () => {
+ const specUrl = 'https://www.w3.org/TR/spec';
+ const specUrl2 = 'https://www.w3.org/TR/spec2';
+
+ function toCrawlResult(algorithms) {
+ return [{ url: specUrl, algorithms }];
+ }
+
+ it('reports no anomaly if there are no algorithms', () => {
+ const crawlResult = toCrawlResult([]);
+ const report = study(crawlResult);
+ assertNbAnomalies(report, 0);
+ });
+
+ it('reports an error when a step resolves a promise in parallel', () => {
+ const crawlResult = toCrawlResult([
+ {
+ html: 'The encodingInfo()
method MUST run the following steps:',
+ rationale: 'if',
+ steps: [
+ { html: 'Let p be a new promise.' },
+ { html: 'In parallel, run the Create a MediaCapabilitiesEncodingInfo algorithm with configuration and resolve p with its result.' },
+ { html: 'Return p.' }
+ ]
+ }
+ ]);
+ const report = study(crawlResult);
+ assertAnomaly(report, 0, {
+ name: 'missingTaskForPromise',
+ message: 'The algorithm that starts with "The encodingInfo() method MUST run the following steps:" has a parallel step that resolves/rejects a promise directly',
+ spec: { url: 'https://www.w3.org/TR/spec' }
+ });
+ });
+});
\ No newline at end of file
diff --git a/test/study-backrefs.js b/test/study-backrefs.js
index ea755137..8e6e4eac 100644
--- a/test/study-backrefs.js
+++ b/test/study-backrefs.js
@@ -3,7 +3,7 @@
*/
/* global describe, it */
-import studyBackrefs from '../src/lib/study-backrefs.js';
+import study from '../src/lib/study-backrefs.js';
import { assertNbAnomalies, assertAnomaly } from './util.js';
const specEdUrl = 'https://w3c.github.io/spec/';
@@ -48,28 +48,33 @@ const populateSpec = (url, ids, links, dfns) => {
function toCrawlResults (ids, links, trIds = ids) {
return {
- ed: [populateSpec(specEdUrl, toFullIds(specEdUrl, ids), []),
- populateSpec(specEdUrl2, [], toLinks(specEdUrl, links))],
- tr: [populateSpec(specEdUrl, toFullIds(specEdUrl, trIds), [])]
+ ed: [
+ populateSpec(specEdUrl, toFullIds(specEdUrl, ids), []),
+ populateSpec(specEdUrl2, [], toLinks(specEdUrl, links))
+ ],
+ tr: [
+ populateSpec(specEdUrl, toFullIds(specEdUrl, trIds), [])
+ ]
};
}
describe('The links analyser', () => {
- it('reports no anomaly if links are valid', () => {
+ it('reports no anomaly if links are valid', async () => {
const ids = ['validid'];
const crawlResult = toCrawlResults(ids, ids);
- const report = studyBackrefs(crawlResult.ed, crawlResult.tr);
+ const report = await study(crawlResult.ed, { htmlFragments: {} });
assertNbAnomalies(report, 0);
});
- it('reports a broken link', () => {
+ it('reports a broken link', async () => {
const ids = ['validid'];
const crawlResult = toCrawlResults([], ids);
- const report = studyBackrefs(crawlResult.ed, crawlResult.tr);
+ const report = await study(crawlResult.ed, { htmlFragments: {} });
assertNbAnomalies(report, 1);
assertAnomaly(report, 0, {
- category: 'links',
- message: specEdUrl + '#' + ids[0]
+ name: 'brokenLinks',
+ message: specEdUrl + '#' + ids[0],
+ spec: { url: 'https://www.w3.org/TR/spec2/' }
});
});
diff --git a/test/study-dfns.js b/test/study-dfns.js
new file mode 100644
index 00000000..b656c52c
--- /dev/null
+++ b/test/study-dfns.js
@@ -0,0 +1,39 @@
+import studyDefinitions from '../src/lib/study-dfns.js';
+import { assertNbAnomalies, assertAnomaly } from './util.js';
+
+describe('The definitions analyser', () => {
+ const specUrl = 'https://www.w3.org/TR/spec';
+ const specUrl2 = 'https://www.w3.org/TR/spec2';
+
+ function toCrawlResult({ css = {}, dfns = [], idlparsed = {} }) {
+ const crawlResult = [{
+ url: specUrl,
+ css, dfns, idlparsed
+ }];
+ return crawlResult;
+ }
+
+ it('reports no anomaly if there are no definitions', () => {
+ const crawlResult = toCrawlResult({});
+ const report = studyDefinitions(crawlResult);
+ assertNbAnomalies(report, 0);
+ });
+
+ it('reports missing definition anomalies from CSS extracts', () => {
+ const crawlResult = toCrawlResult({
+ css: {
+ warnings: [{
+ msg: 'Missing definition',
+ name: 'no-def',
+ type: 'value'
+ }]
+ }
+ });
+ const report = studyDefinitions(crawlResult);
+ assertAnomaly(report, 0, {
+ name: 'missingDfns',
+ message: '`no-def` with type `value`',
+ spec: { url: 'https://www.w3.org/TR/spec' }
+ });
+ });
+});
\ No newline at end of file
diff --git a/test/study-refs.js b/test/study-refs.js
index 11a9db18..3250e183 100644
--- a/test/study-refs.js
+++ b/test/study-refs.js
@@ -3,7 +3,7 @@
*/
/* global describe, it */
-import studyReferences from '../src/lib/study-refs.js';
+import study from '../src/lib/study-refs.js';
import { assertNbAnomalies, assertAnomaly } from './util.js';
const specEdUrl = 'https://w3c.github.io/spec/';
@@ -14,6 +14,9 @@ function toRefs (name, url) {
return [ {name, url} ];
}
+const toTr = url => url.replace(
+ 'https://w3c.github.io',
+ 'https://www.w3.org/TR');
const populateSpec = (url, refs = [], standing = "good", obsoletedBy) => {
const shortname = url.slice(0, -1).split('/').pop();
@@ -25,6 +28,9 @@ const populateSpec = (url, refs = [], standing = "good", obsoletedBy) => {
nightly: {
url
},
+ release: {
+ url: toTr(url)
+ },
shortname,
standing,
obsoletedBy
@@ -39,31 +45,62 @@ function toEdCrawlResults (standing = "good", replacements) {
];
}
-describe('The reference analyser', () => {
+describe('The references analyser', () => {
it('reports no anomaly if references are not discontinued', () => {
const crawlResult = toEdCrawlResults();
- const report = studyReferences(crawlResult);
+ const report = study(crawlResult);
assertNbAnomalies(report, 0);
});
it('reports a discontinued reference with a replacement', () => {
const crawlResult = toEdCrawlResults("discontinued", ["spec3"]);
- const report = studyReferences(crawlResult);
+ const report = study(crawlResult);
assertNbAnomalies(report, 1);
assertAnomaly(report, 0, {
- category: 'refs',
- message: /spec3/
+ name: 'discontinuedReferences',
+ message: /spec3/,
+ spec: { url: specEdUrl }
});
});
it('reports a discontinued reference without a replacement', () => {
const crawlResult = toEdCrawlResults("discontinued");
- const report = studyReferences(crawlResult);
+ const report = study(crawlResult);
+ assertNbAnomalies(report, 1);
+ assertAnomaly(report, 0, {
+ name: 'discontinuedReferences',
+ message: /no known replacement/,
+ spec: { url: specEdUrl }
+ });
+ });
+
+ it('reports a missing reference', () => {
+ const spec = populateSpec(specEdUrl);
+ spec.links = { rawlinks: {} };
+ spec.links.rawlinks[specEdUrl2] = {};
+ const crawlResult = [spec];
+ const report = study(crawlResult);
assertNbAnomalies(report, 1);
assertAnomaly(report, 0, {
- category: 'refs',
- message: /no known replacement/
+ name: 'missingReferences',
+ message: specEdUrl2,
+ spec: { url: specEdUrl }
});
});
+ it('reports an inconsistent reference', () => {
+ const spec = populateSpec(specEdUrl, toRefs('spec2', toTr(specEdUrl2)));
+ spec.links = { rawlinks: {} };
+ spec.links.rawlinks[specEdUrl2] = {};
+ const spec2 = populateSpec(specEdUrl2);
+ spec2.versions = [toTr(specEdUrl2)];
+ const crawlResult = [spec, spec2];
+ const report = study(crawlResult);
+ assertNbAnomalies(report, 1);
+ assertAnomaly(report, 0, {
+ name: 'inconsistentReferences',
+ message: `${specEdUrl2}, related reference "spec2" uses URL ${toTr(specEdUrl2)}`,
+ spec: { url: specEdUrl }
+ });
+ });
});
diff --git a/test/study-webidl.js b/test/study-webidl.js
index 4660b0e0..52e13a3e 100644
--- a/test/study-webidl.js
+++ b/test/study-webidl.js
@@ -4,7 +4,7 @@
*/
/* global describe, it */
-import studyWebIdl from '../src/lib/study-webidl.js';
+import study from '../src/lib/study-webidl.js';
import { assertNbAnomalies, assertAnomaly } from './util.js';
describe('The Web IDL analyser', () => {
@@ -21,7 +21,7 @@ describe('The Web IDL analyser', () => {
function analyzeIdl (idl, idlSpec2) {
const crawlResult = toCrawlResult(idl, idlSpec2);
- return studyWebIdl(crawlResult);
+ return study(crawlResult);
}
it('reports no anomaly if IDL is valid', () => {
@@ -86,12 +86,11 @@ interface Invalid;
`);
assertNbAnomalies(report, 1);
assertAnomaly(report, 0, {
- category: 'webidl',
name: 'invalid',
message: `Syntax error at line 3, since \`interface Invalid\`:
interface Invalid;
^ Bodyless interface`,
- specs: [{ url: specUrl }]
+ spec: { url: specUrl }
});
});
@@ -105,11 +104,11 @@ interface Invalid;
[Global=Window,Exposed=*]
interface Valid: Invalid {};
`);
- const curatedResult = toCrawlResult(`
+ const curatedResults = toCrawlResult(`
[Global=Window,Exposed=*]
interface Invalid{};
`);
- const report = studyWebIdl(crawlResult, curatedResult);
+ const report = study(crawlResult, { curatedResults });
assertNbAnomalies(report, 1);
assertAnomaly(report, 0, { name: 'invalid' });
});
diff --git a/test/study.js b/test/study.js
new file mode 100644
index 00000000..30c09386
--- /dev/null
+++ b/test/study.js
@@ -0,0 +1,169 @@
+import study from '../src/lib/study.js';
+import { assertNbAnomalies, assertAnomaly } from './util.js';
+
+const specUrl = 'https://w3c.github.io/world/';
+const specUrl2 = 'https://w3c.github.io/universe/';
+
+function toTr(url) {
+ return url.replace('https://w3c.github.io', 'https://www.w3.org/TR');
+}
+
+function populateSpec(url, crawl) {
+ const shortname = url.slice(0, -1).split('/').pop();
+ const spec = Object.assign({
+ shortname,
+ title: `Hello ${shortname} API`,
+ url: toTr(url),
+ nightly: { url },
+ release: { url: toTr(url) },
+ crawled: url
+ }, crawl);
+ return spec;
+}
+
+describe('The main study function', function () {
+ this.slow(5000);
+ this.timeout(10000);
+
+ it('reports no anomaly when spec is empty', async function() {
+ const crawlResult = [{ url: specUrl }];
+ const report = await study(crawlResult, { htmlFragments: {} });
+ assertNbAnomalies(report.results, 0);
+ });
+
+ it('reports anomalies per type and spec by default', async function() {
+ const crawlResult = [
+ populateSpec(specUrl, { error: 'Boo' }),
+ populateSpec(specUrl2, { error: 'Borked' })
+ ];
+ const report = await study(crawlResult, { htmlFragments: {} });
+ assertNbAnomalies(report.results, 2);
+ assertAnomaly(report.results, 0, {
+ title: 'Crawl error in Hello world API',
+ content:
+`While crawling [Hello world API](${specUrl}), the following crawl errors occurred:
+* [ ] Boo`
+ });
+ assertAnomaly(report.results, 1, {
+ title: 'Crawl error in Hello universe API',
+ content:
+`While crawling [Hello universe API](${specUrl2}), the following crawl errors occurred:
+* [ ] Borked`
+ });
+ });
+
+ it('reports anomalies per type when asked', async function() {
+ const crawlResult = [
+ populateSpec(specUrl, { error: 'Boo' }),
+ populateSpec(specUrl2, { error: 'Borked' })
+ ];
+ const report = await study(crawlResult, { structure: 'type/spec', htmlFragments: {} });
+ assertNbAnomalies(report.results, 1);
+ assertAnomaly(report.results, 0, {
+ title: 'Crawl error',
+ content:
+`The following crawl errors occurred:
+* [Hello world API](https://w3c.github.io/world/)
+ * [ ] Boo
+* [Hello universe API](https://w3c.github.io/universe/)
+ * [ ] Borked`
+ });
+ });
+
+ it('reports anomalies per spec when asked', async function() {
+ const crawlResult = [
+ populateSpec(specUrl, { error: 'Boo' }),
+ populateSpec(specUrl2, { error: 'Borked' })
+ ];
+ const report = await study(crawlResult, { structure: 'spec/type', htmlFragments: {} });
+ assertNbAnomalies(report.results, 2);
+ assertAnomaly(report.results, 0, {
+ title: 'Hello world API',
+ content:
+`While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified:
+* Crawl error
+ * [ ] Boo`
+ });
+ });
+
+ it('reports anomalies per spec and groups anomalies when asked', async function() {
+ const crawlResult = [
+ populateSpec(specUrl, { error: 'Boo' }),
+ populateSpec(specUrl2, { error: 'Borked' })
+ ];
+ const report = await study(crawlResult, { structure: 'spec/group/type', htmlFragments: {} });
+ assertNbAnomalies(report.results, 2);
+ assertAnomaly(report.results, 0, {
+ title: 'Hello world API',
+ content:
+`While crawling [Hello world API](https://w3c.github.io/world/), the following anomalies were identified:
+* Generic
+ * Crawl error
+ * [ ] Boo`
+ });
+ });
+
+ it('reports anomalies per group and spec when asked', async function() {
+ const crawlResult = [
+ populateSpec(specUrl, { error: 'Boo' }),
+ populateSpec(specUrl2, { error: 'Borked' })
+ ];
+ const report = await study(crawlResult, { structure: 'group+spec/type', htmlFragments: {} });
+ assertNbAnomalies(report.results, 2);
+ assertAnomaly(report.results, 0, {
+ title: 'Generic in Hello world API',
+ content:
+`While crawling [Hello world API](https://w3c.github.io/world/), the following errors prevented the spec from being analyzed:
+* Crawl error
+ * [ ] Boo`
+ });
+ });
+
+ it('reports anomalies per group, with anomaly type as intermediary level, when asked', async function() {
+ const crawlResult = [
+ populateSpec(specUrl, { error: 'Boo' }),
+ populateSpec(specUrl2, { error: 'Borked' })
+ ];
+ const report = await study(crawlResult, { structure: 'group/type/spec', htmlFragments: {} });
+ assertNbAnomalies(report.results, 1);
+ assertAnomaly(report.results, 0, {
+ title: 'Generic',
+ content:
+`The following errors prevented the spec from being analyzed:
+* Crawl error
+ * [Hello world API](https://w3c.github.io/world/)
+ * [ ] Boo
+ * [Hello universe API](https://w3c.github.io/universe/)
+ * [ ] Borked`
+ });
+ });
+
+ it('reports anomalies per group, with spec as intermediary level, when asked', async function() {
+ const crawlResult = [
+ populateSpec(specUrl, { error: 'Boo' }),
+ populateSpec(specUrl2, { error: 'Borked' })
+ ];
+ const report = await study(crawlResult, { structure: 'group/spec/type', htmlFragments: {} });
+ assertNbAnomalies(report.results, 1);
+ assertAnomaly(report.results, 0, {
+ title: 'Generic',
+ content:
+`The following errors prevented the spec from being analyzed:
+* [Hello world API](https://w3c.github.io/world/)
+ * Crawl error
+ * [ ] Boo
+* [Hello universe API](https://w3c.github.io/universe/)
+ * Crawl error
+ * [ ] Borked`
+ });
+ });
+
+ it('only reports anomalies for requested specs', async function() {
+ const crawlResult = [
+ populateSpec(specUrl, { error: 'Boo' }),
+ populateSpec(specUrl2, { error: 'Borked' })
+ ];
+ const report = await study(crawlResult, { specs: ['universe'], htmlFragments: {} });
+ assertNbAnomalies(report.results, 1);
+ });
+});
\ No newline at end of file