From 6dc3776b54f14bf70126ecad20b25662fb3fe0a7 Mon Sep 17 00:00:00 2001 From: Romain Deltour Date: Sun, 23 Jan 2022 23:42:31 +0100 Subject: [PATCH] feat: allow SVG/MathML doctype declarations EPUB 3.3. now allows a reserved set of external identifiers in doctype declarations of documents with select media types. See: https://www.w3.org/TR/epub-33/#app-identifiers-allowed This commit: - adds those as special cases to the XML parser code - totally removes entity fetching for EPUB 3.3 - keeps forbidding external entities in the internal subset Fix #1192, Fix #1114 --- .../com/adobe/epubcheck/xml/XMLParser.java | 31 ++++++++++++++----- .../EPUB/content_001.xhtml | 11 +++++++ .../EPUB/mathml-mediatype-1.xml | 15 +++++++++ .../EPUB/mathml-mediatype-2.xml | 15 +++++++++ .../EPUB/mathml-mediatype-3.xml | 15 +++++++++ .../EPUB/nav.xhtml | 0 .../EPUB/package.opf | 24 ++++++++++++++ .../EPUB/svg.svg | 7 +++++ .../META-INF/container.xml | 6 ++++ .../mimetype | 0 .../EPUB/content_001.xhtml | 11 +++++++ .../EPUB/nav.xhtml | 14 +++++++++ .../EPUB/package.opf | 0 .../EPUB/toc.ncx | 24 ++++++++++++++ .../META-INF/container.xml | 6 ++++ .../mimetype | 1 + .../EPUB/content_001.xhtml | 0 .../EPUB/nav.xhtml | 14 +++++++++ .../EPUB/package.opf | 17 ++++++++++ .../EPUB/toc.ncx | 0 .../META-INF/container.xml | 0 .../mimetype | 1 + .../epub3/resources-publication.feature | 13 ++++++-- 23 files changed, 215 insertions(+), 10 deletions(-) create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/content_001.xhtml create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-1.xml create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-2.xml create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-3.xml rename src/test/resources/epub3/files/epub/{xml-ncx-doctype-external-identifier-error => xml-external-identifier-allowed-valid}/EPUB/nav.xhtml (100%) create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/package.opf create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/svg.svg create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/META-INF/container.xml rename src/test/resources/epub3/files/epub/{xml-ncx-doctype-external-identifier-error => xml-external-identifier-allowed-valid}/mimetype (100%) create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/content_001.xhtml create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/nav.xhtml rename src/test/resources/epub3/files/epub/{xml-ncx-doctype-external-identifier-error => xml-external-identifier-bad-mediatype-error}/EPUB/package.opf (100%) create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/toc.ncx create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/META-INF/container.xml create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/mimetype rename src/test/resources/epub3/files/epub/{xml-ncx-doctype-external-identifier-error => xml-external-identifier-disallowed-error}/EPUB/content_001.xhtml (100%) create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/nav.xhtml create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/package.opf rename src/test/resources/epub3/files/epub/{xml-ncx-doctype-external-identifier-error => xml-external-identifier-disallowed-error}/EPUB/toc.ncx (100%) rename src/test/resources/epub3/files/epub/{xml-ncx-doctype-external-identifier-error => xml-external-identifier-disallowed-error}/META-INF/container.xml (100%) create mode 100644 src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/mimetype diff --git a/src/main/java/com/adobe/epubcheck/xml/XMLParser.java b/src/main/java/com/adobe/epubcheck/xml/XMLParser.java index a2834802d..a752640fa 100755 --- a/src/main/java/com/adobe/epubcheck/xml/XMLParser.java +++ b/src/main/java/com/adobe/epubcheck/xml/XMLParser.java @@ -313,7 +313,11 @@ public InputSource resolveEntity(String publicId, String systemId) String resourcePath = systemIdMap.get(systemId); - if (resourcePath != null) + // external entities are not resolved in EPUB 3 + if (context.version == EPUBVersion.VERSION_3 || systemId.equals("about:legacy-compat")) { + return new InputSource(new StringReader("")); + } + else if (resourcePath != null) { InputStream resourceStream = ResourceUtil.getResourceStream(resourcePath); InputSource source = new InputSource(resourceStream); @@ -321,12 +325,6 @@ public InputSource resolveEntity(String publicId, String systemId) source.setSystemId(systemId); return source; } - else if (systemId.equals("about:legacy-compat")) - { - // special case - return new InputSource(new StringReader("")); - - } else { // check for a system prop that turns off online fetching @@ -797,7 +795,24 @@ else if (context.version == EPUBVersion.VERSION_3) } else if (publicId != null || systemId != null) { - report.message(MessageId.OPF_073, getLocation()); + // check if the declaration is allowed for the current media type + boolean isAllowed; + switch (mimeType) + { + case "image/svg+xml": + isAllowed = "-//W3C//DTD SVG 1.1//EN".equals(publicId) && "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd".equals(systemId); + break; + case "application/mathml+xml": + case "application/mathml-content+xml": + case "application/mathml-presentation+xml": + isAllowed = "-//W3C//DTD MathML 3.0//EN".equals(publicId) && "http://www.w3.org/Math/DTD/mathml3/mathml3.dtd".equals(systemId); + break; + default: + isAllowed= false; + } + if (!isAllowed) { + report.message(MessageId.OPF_073, getLocation()); + } } } diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/content_001.xhtml b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/content_001.xhtml new file mode 100644 index 000000000..ea29a1610 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/content_001.xhtml @@ -0,0 +1,11 @@ + + + + + Minimal EPUB + + +

Loomings

+

Call me Ishmael.

+ + diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-1.xml b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-1.xml new file mode 100644 index 000000000..0cb590364 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-1.xml @@ -0,0 +1,15 @@ + + + + + 2 + + x + + + + + y + - + z + + diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-2.xml b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-2.xml new file mode 100644 index 000000000..0cb590364 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-2.xml @@ -0,0 +1,15 @@ + + + + + 2 + + x + + + + + y + - + z + + diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-3.xml b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-3.xml new file mode 100644 index 000000000..0cb590364 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/mathml-mediatype-3.xml @@ -0,0 +1,15 @@ + + + + + 2 + + x + + + + + y + - + z + + diff --git a/src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/EPUB/nav.xhtml b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/nav.xhtml similarity index 100% rename from src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/EPUB/nav.xhtml rename to src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/nav.xhtml diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/package.opf b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/package.opf new file mode 100644 index 000000000..f89109202 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/package.opf @@ -0,0 +1,24 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/svg.svg b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/svg.svg new file mode 100644 index 000000000..8f898041e --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/EPUB/svg.svg @@ -0,0 +1,7 @@ + + + + Test SVG document + Rectangle + + diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/META-INF/container.xml b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/META-INF/container.xml new file mode 100644 index 000000000..318782179 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/META-INF/container.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/mimetype b/src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/mimetype similarity index 100% rename from src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/mimetype rename to src/test/resources/epub3/files/epub/xml-external-identifier-allowed-valid/mimetype diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/content_001.xhtml b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/content_001.xhtml new file mode 100644 index 000000000..18c223f36 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/content_001.xhtml @@ -0,0 +1,11 @@ + + + + + Minimal EPUB + + +

Loomings

+

Call me Ishmael.

+ + diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/nav.xhtml b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/nav.xhtml new file mode 100644 index 000000000..240745e63 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/nav.xhtml @@ -0,0 +1,14 @@ + + + + + Minimal Nav + + + + + diff --git a/src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/EPUB/package.opf b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/package.opf similarity index 100% rename from src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/EPUB/package.opf rename to src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/package.opf diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/toc.ncx b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/toc.ncx new file mode 100644 index 000000000..dfa0200ca --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/EPUB/toc.ncx @@ -0,0 +1,24 @@ + + + + + + + + + + + NCX + + + + + Chapter 1 + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/META-INF/container.xml b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/META-INF/container.xml new file mode 100644 index 000000000..318782179 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/META-INF/container.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/mimetype b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/mimetype new file mode 100644 index 000000000..57ef03f24 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-bad-mediatype-error/mimetype @@ -0,0 +1 @@ +application/epub+zip \ No newline at end of file diff --git a/src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/EPUB/content_001.xhtml b/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/content_001.xhtml similarity index 100% rename from src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/EPUB/content_001.xhtml rename to src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/content_001.xhtml diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/nav.xhtml b/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/nav.xhtml new file mode 100644 index 000000000..240745e63 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/nav.xhtml @@ -0,0 +1,14 @@ + + + + + Minimal Nav + + + + + diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/package.opf b/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/package.opf new file mode 100644 index 000000000..eb9450a35 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/package.opf @@ -0,0 +1,17 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/EPUB/toc.ncx b/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/toc.ncx similarity index 100% rename from src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/EPUB/toc.ncx rename to src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/EPUB/toc.ncx diff --git a/src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/META-INF/container.xml b/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/META-INF/container.xml similarity index 100% rename from src/test/resources/epub3/files/epub/xml-ncx-doctype-external-identifier-error/META-INF/container.xml rename to src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/META-INF/container.xml diff --git a/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/mimetype b/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/mimetype new file mode 100644 index 000000000..57ef03f24 --- /dev/null +++ b/src/test/resources/epub3/files/epub/xml-external-identifier-disallowed-error/mimetype @@ -0,0 +1 @@ +application/epub+zip \ No newline at end of file diff --git a/src/test/resources/epub3/resources-publication.feature b/src/test/resources/epub3/resources-publication.feature index b28bc5edf..eb15ffa3e 100644 --- a/src/test/resources/epub3/resources-publication.feature +++ b/src/test/resources/epub3/resources-publication.feature @@ -254,8 +254,17 @@ Feature: EPUB 3 ▸ Publication Resources ▸ Full Publication Checks ## 3.3 XML Conformance - Scenario: Report an NCX file with a DOCTYPE declaration including the external identifier (issue 305) - When checking EPUB 'xml-ncx-doctype-external-identifier-error' + Scenario: Verify DOCTYPE declarations with allowed external identifiers + When checking EPUB 'xml-external-identifier-allowed-valid' + Then no errors or warnings are reported + + Scenario: Report a DOCTYPE declaration with an allowed external identifier but not on the expected media type + When checking EPUB 'xml-external-identifier-bad-mediatype-error' + Then error OPF-073 is reported + And no other errors or warnings are reported + + Scenario: Report a DOCTYPE declaration with an external identifier that is not allowed + When checking EPUB 'xml-external-identifier-disallowed-error' Then error OPF-073 is reported And no other errors or warnings are reported