From bec390ef355270f7de43cf22ed21e746651b9cad Mon Sep 17 00:00:00 2001 From: Romain Deltour Date: Wed, 16 Nov 2022 10:31:10 +0100 Subject: [PATCH] feat: better parse URL fragment micro syntaxes This commit introduce a new `URLFragment` class to represent URL fragments. Fragment strings are parsed into `URLFragment` instances using MIME type-specific logic, implementing some validity checks for a few micro syntaxes including: - shortand bare name IDs - scheme-based fragments - media fragments SVG and HTML/XHTML MIME types are supported. The parser is tested in the `url-fragment.feature` feature file, in a new `unit-tests` directory. --- .../com/adobe/epubcheck/opf/OPFChecker.java | 3 + .../com/adobe/epubcheck/opf/OPFChecker30.java | 5 +- .../com/adobe/epubcheck/opf/XRefChecker.java | 69 ++- .../com/adobe/epubcheck/ops/OPSHandler.java | 5 - .../org/w3c/epubcheck/constants/MIMEType.java | 2 +- .../org/w3c/epubcheck/url/URLFragment.java | 410 ++++++++++++++++++ .../w3c/epubcheck/url/URLFragmentSteps.java | 67 +++ .../EPUB/package.opf | 2 +- .../resources/unit-tests/url-fragment.feature | 105 +++++ 9 files changed, 636 insertions(+), 32 deletions(-) create mode 100644 src/main/java/org/w3c/epubcheck/url/URLFragment.java create mode 100644 src/test/java/org/w3c/epubcheck/url/URLFragmentSteps.java create mode 100644 src/test/resources/unit-tests/url-fragment.feature diff --git a/src/main/java/com/adobe/epubcheck/opf/OPFChecker.java b/src/main/java/com/adobe/epubcheck/opf/OPFChecker.java index 72a34f234..06a560bfb 100755 --- a/src/main/java/com/adobe/epubcheck/opf/OPFChecker.java +++ b/src/main/java/com/adobe/epubcheck/opf/OPFChecker.java @@ -116,6 +116,9 @@ protected boolean checkPackage() List items = opfHandler.getItems(); report.info(null, FeatureEnum.ITEMS_COUNT, Integer.toString(items.size())); + + // Register package doc and items to the XRefChecker + xrefChecker.registerResource(context.url, context.mimeType); for (OPFItem item : items) { xrefChecker.registerResource(item, diff --git a/src/main/java/com/adobe/epubcheck/opf/OPFChecker30.java b/src/main/java/com/adobe/epubcheck/opf/OPFChecker30.java index 08681930f..f249a1193 100644 --- a/src/main/java/com/adobe/epubcheck/opf/OPFChecker30.java +++ b/src/main/java/com/adobe/epubcheck/opf/OPFChecker30.java @@ -25,6 +25,8 @@ import java.util.Iterator; import java.util.Set; +import org.w3c.epubcheck.url.URLFragment; + import com.adobe.epubcheck.api.EPUBLocation; import com.adobe.epubcheck.api.EPUBProfile; import com.adobe.epubcheck.api.FeatureReport.Feature; @@ -387,7 +389,8 @@ private void checkPreviewCollection(ResourceCollection collection) } else { - if (Optional.fromNullable(resource.getURL().fragment()).or("").startsWith("epubcfi(")) + URLFragment fragment = URLFragment.parse(resource.getURL()); + if (fragment.exists() && "epubcfi".equals(fragment.getScheme())) { report.message(MessageId.OPF_076, EPUBLocation.of(context)); } diff --git a/src/main/java/com/adobe/epubcheck/opf/XRefChecker.java b/src/main/java/com/adobe/epubcheck/opf/XRefChecker.java index 1820f402b..655478a2a 100755 --- a/src/main/java/com/adobe/epubcheck/opf/XRefChecker.java +++ b/src/main/java/com/adobe/epubcheck/opf/XRefChecker.java @@ -31,8 +31,9 @@ import java.util.Map; import java.util.Queue; import java.util.Set; -import java.util.regex.Pattern; +import org.w3c.epubcheck.constants.MIMEType; +import org.w3c.epubcheck.url.URLFragment; import org.w3c.epubcheck.url.URLUtils; import com.adobe.epubcheck.api.EPUBLocation; @@ -126,6 +127,7 @@ public static final class Builder private OPFItem item = null; private boolean hasItemFallback = false; private boolean hasImageFallback = false; + public String mimetype; public Builder url(URL url) { @@ -137,6 +139,13 @@ public Builder item(OPFItem item) { this.url = item.getURL(); this.item = item; + this.mimetype = item.getMimeType(); + return this; + } + + public Builder mimetype(String mimetype) + { + this.mimetype = mimetype; return this; } @@ -231,8 +240,6 @@ public boolean isInSpine() } } - private static final Pattern REGEX_SVG_VIEW = Pattern.compile("svgView\\(.*\\)"); - private final Map resources = new HashMap(); private final Set undeclared = new HashSet(); @@ -281,7 +288,7 @@ public Optional getResource(URL url) * @param path * the path to a publication resource * @return an immutable {@link EnumSet} containing the types of references to - * {@code path}. + * {@code path}. */ public Set getTypes(URL resource) { @@ -413,9 +420,15 @@ public void checkReferences() private void checkReference(URLReference reference) { Resource hostResource = resources.get(reference.location.url); - Resource targetResource = resources.get(reference.targetDoc); + + // Retrieve the Resource instance representing the targeted document // If the resource was not declared in the manifest, // we build a new Resource object for the data URL. + Resource targetResource = resources.get(reference.targetDoc); + String targetMimetype = (targetResource != null) ? targetResource.getMimeType() : ""; + + // Parse the URL fragment + URLFragment fragment = URLFragment.parse(reference.url, targetMimetype); // Check remote resources if (container.isRemote(reference.url) @@ -470,15 +483,18 @@ else if (!undeclared.contains(reference.targetDoc) return; } - String mimetype = targetResource.getMimeType(); - // Type-specific checks switch (reference.type) { case HYPERLINK: + if ("epubcfi".equals(fragment.getScheme())) + { + break; // EPUB CFI is not supported + } // if mimeType is null, we should have reported an error already - if (!OPFChecker.isBlessedItemType(mimetype, version) - && !OPFChecker.isDeprecatedBlessedItemType(mimetype) && !targetResource.hasItemFallback()) + if (!OPFChecker.isBlessedItemType(targetMimetype, version) + && !OPFChecker.isDeprecatedBlessedItemType(targetMimetype) + && !targetResource.hasItemFallback()) { report.message(MessageId.RSC_010, reference.location.context(container.relativize(reference.url))); @@ -494,31 +510,35 @@ else if (!undeclared.contains(reference.targetDoc) case IMAGE: case PICTURE_SOURCE: case PICTURE_SOURCE_FOREIGN: - if (reference.url.fragment() != null && !mimetype.equals("image/svg+xml")) + if ("epubcfi".equals(fragment.getScheme())) + { + break; // EPUB CFI is not supported + } + if (fragment.exists() && !MIMEType.SVG.is(targetMimetype)) { report.message(MessageId.RSC_009, reference.location.context(container.relativize(reference.url))); return; } // if mimeType is null, we should have reported an error already - if (!OPFChecker.isBlessedImageType(mimetype, version)) + if (!OPFChecker.isBlessedImageType(targetMimetype, version)) { if (version == EPUBVersion.VERSION_3 && reference.type == Type.PICTURE_SOURCE) { report.message(MessageId.MED_007, reference.location, - container.relativize(reference.targetDoc), mimetype); + container.relativize(reference.targetDoc), targetMimetype); return; } else if (reference.type == Type.IMAGE && !targetResource.hasImageFallback()) { report.message(MessageId.MED_003, reference.location, - container.relativize(reference.targetDoc), mimetype); + container.relativize(reference.targetDoc), targetMimetype); } } break; case SEARCH_KEY: // TODO update when we support EPUB CFI - if ((reference.url.fragment() == null || !reference.url.fragment().startsWith("epubcfi(")) + if ((!fragment.exists() || !"epubcfi".equals(fragment.getScheme())) && !targetResource.isInSpine()) { report.message(MessageId.RSC_021, reference.location, @@ -527,7 +547,7 @@ else if (reference.type == Type.IMAGE && !targetResource.hasImageFallback()) } break; case STYLESHEET: - if (reference.url.fragment() != null) + if (fragment.exists()) { report.message(MessageId.RSC_013, reference.location.context(container.relativize(reference.url))); @@ -551,7 +571,7 @@ else if (reference.type == Type.IMAGE && !targetResource.hasImageFallback()) case SVG_CLIP_PATH: case SVG_PAINT: case SVG_SYMBOL: - if (reference.url.fragment() == null) + if (!fragment.exists()) { report.message(MessageId.RSC_015, reference.location.context(reference.url)); return; @@ -562,32 +582,32 @@ else if (reference.type == Type.IMAGE && !targetResource.hasImageFallback()) } // Fragment integrity checks - String fragment = reference.url.fragment(); - if (fragment != null && !fragment.isEmpty()) + if (fragment.exists() && !fragment.isEmpty()) { // EPUB CFI - if (fragment.startsWith("epubcfi(")) + if ("epubcfi".equals(fragment.getScheme())) { + // FIXME HOT should warn if in MO // FIXME epubcfi currently not supported (see issue 150). return; } // Media fragments in Data Navigation Documents - else if (fragment.contains("=") && hostResource != null && hostResource.hasItem() + else if (fragment.isMediaFragment() && hostResource != null && hostResource.hasItem() && hostResource.getItem().getProperties() .contains(PackageVocabs.ITEM_VOCAB.get(PackageVocabs.ITEM_PROPERTIES.DATA_NAV))) { // Ignore, return; } - // SVG view fragments are ignored - else if (mimetype.equals("image/svg+xml") && REGEX_SVG_VIEW.matcher(fragment).matches()) + // Non-ID-based fragments are ignored + else if (fragment.getId().isEmpty()) { return; } // Fragment Identifier (by default) else if (!container.isRemote(reference.targetDoc)) { - ID anchor = targetResource.ids.get(fragment); + ID anchor = targetResource.ids.get(fragment.getId()); if (anchor == null) { report.message(MessageId.RSC_012, reference.location.context(reference.url.toString())); @@ -674,7 +694,8 @@ private void checkReadingOrder(Queue references, int lastSpinePosi } // check that the fragment is in document order - int targetAnchorPosition = res.getIDPosition(ref.url.fragment()); + URLFragment fragment = URLFragment.parse(ref.url, res.getMimeType()); + int targetAnchorPosition = res.getIDPosition(fragment.getId()); if (targetAnchorPosition < lastAnchorPosition) { String orderContext = LocalizedMessages.getInstance(locale).getSuggestion(MessageId.NAV_011, diff --git a/src/main/java/com/adobe/epubcheck/ops/OPSHandler.java b/src/main/java/com/adobe/epubcheck/ops/OPSHandler.java index 7ce928429..ad3993eaa 100755 --- a/src/main/java/com/adobe/epubcheck/ops/OPSHandler.java +++ b/src/main/java/com/adobe/epubcheck/ops/OPSHandler.java @@ -141,11 +141,6 @@ else if (".".equals(href)) // If the URL was not properly parsed, return early if (url == null) return; - // If the URL is an EPUB CFI, return (not implemented) - if (url.fragment() != null && url.fragment().matches("epubcfi\\(.*\\)")) - { - return; // temp until cfi implemented - } if ("file".equals(url.scheme())) { diff --git a/src/main/java/org/w3c/epubcheck/constants/MIMEType.java b/src/main/java/org/w3c/epubcheck/constants/MIMEType.java index 29dc92e2b..ced86f640 100644 --- a/src/main/java/org/w3c/epubcheck/constants/MIMEType.java +++ b/src/main/java/org/w3c/epubcheck/constants/MIMEType.java @@ -54,6 +54,6 @@ public boolean is(String string) public static MIMEType get(String name) { - return ENUM_MAP.getOrDefault(name.toLowerCase(Locale.ROOT), OTHER); + return (name != null) ? ENUM_MAP.getOrDefault(name.toLowerCase(Locale.ROOT), OTHER) : OTHER; } } diff --git a/src/main/java/org/w3c/epubcheck/url/URLFragment.java b/src/main/java/org/w3c/epubcheck/url/URLFragment.java new file mode 100644 index 000000000..6baf498ee --- /dev/null +++ b/src/main/java/org/w3c/epubcheck/url/URLFragment.java @@ -0,0 +1,410 @@ +package org.w3c.epubcheck.url; + +import java.util.Iterator; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.w3c.epubcheck.constants.MIMEType; + +import com.google.common.base.Splitter; +import com.google.common.base.Strings; + +import io.mola.galimatias.URL; +import io.mola.galimatias.URLUtils; +import net.sf.saxon.om.NameChecker; + +/** + * Represents a URL fragment, after parsing micro-syntaxes. + */ +public class URLFragment +{ + + /** + * Represents a non-existent fragment, for which {@link #exists()} returns + * false + */ + public static final URLFragment NONE = new URLFragment(new Parser().parse(null, null)); + + private final String fragment; + private final String scheme; + private final String id; + private final boolean isMediaFragment; + private final boolean isValid; + + private URLFragment(Parser parser) + { + this.fragment = parser.fragment; + this.id = Strings.nullToEmpty(parser.id); + this.scheme = Strings.nullToEmpty(parser.scheme); + this.isMediaFragment = parser.isMediaFragment; + this.isValid = parser.isValid; + } + + /** + * Returns the element ID represented by this fragment if this is an ID-based + * fragment, or the empty string otherwise. + * + * @return an element ID or the empty string. + */ + public String getId() + { + return id; + } + + /** + * Returns the scheme represented by this fragment if this is an scheme-based + * fragment, or the empty string otherwise. + * + * @return a scheme name or the empty string. + */ + public String getScheme() + { + return scheme; + } + + /** + * @return true iff the URL from which this was parsed had a + * fragment. + */ + public boolean exists() + { + return fragment != null; + } + + /** + * @return true iff this fragment is the empty string or + * represents a non-existent fragment. + */ + public boolean isEmpty() + { + return fragment == null || fragment.isEmpty(); + } + + /** + * @return true iff this fragment is valid according to its + * target MIME type. + */ + public boolean isValid() + { + return isValid; + } + + /** + * @return true iff this fragment is a media fragment. + */ + public boolean isMediaFragment() + { + return isMediaFragment; + } + + @Override + /** + * @return the full fragment string. + */ + public String toString() + { + return fragment; + } + + @Override + public int hashCode() + { + return Objects.hash(fragment); + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; + URLFragment other = (URLFragment) obj; + return Objects.equals(fragment, other.fragment); + } + + /** + * Parse the fragment of the given URL, according to the rules defined for the + * given MIME type. + * + * If the URL has no fragment, returns {@link #NONE} + * + *

HTML types "application/xhtml+xml" and "text/html"

+ * + *

+ * The following fragment patterns are supported: + *

+ * + *
    + *
  • regular ID-based fragments (`#name`)
  • + *
  • scheme-based fragments (`#name(something)`)
  • + *
  • media fragments (`#name=value`, with name one of + * `t|xywh|track|id|xyn|xyr`
  • + *
  • fragment directives (`#name:~:text=range`)
  • + *
+ * + *

+ * Note that this deviates from the HTML standard in the following way: + *

+ * + *
    + *
  • HTML does not define specific logic for scheme-based or media + * fragments, which must be treated like any other IDs. However, EPUB makes + * use of them notably for EPUB CFI or region-based navigation.
  • + *
  • Fragment directives (as used in text fragments), is an incubating + * standard (at the time of writing) and is likely not well supported by + * reading system, but its syntax is specific enough to lower the risk of + * false-positive.
  • + *
+ * + *

SVG type "image/svg+xml"

+ * + *

+ * The following fragment patterns are supported: + *

+ * + *
    + *
  • shorthand bare form names (#name). Validation checks that + * the name is an XML NCName.
  • + *
  • SVG view specification (#svgView(…)). Validation currently + * does not look into the parenthesis content.
  • + *
  • basic media fragments (#xywh=0,0,50,50). Validation checks + * the syntax of spatial and temporal dimensions.
  • + *
+ * + *

Other type

+ * + *

+ * Any other type is assumed to be XML. The following fragment patterns are + * supported: + *

+ * + *
    + *
  • shorthand bare form names (#name). Validation checks that + * the name is an XML NCName.
  • + *
  • scheme-based fragments (`#name(something)`). No validation of the + * scheme name or syntax.
  • + *
+ * + * @param url + * a URL + * @param mimetype + * the MIME type of the URL target + * @return a parsed fragment (cannot be null) + */ + public static URLFragment parse(URL url, String mimetype) + { + if (url == null || url.fragment() == null) + { + return NONE; + } + else + { + return new URLFragment(new Parser().parse(url.fragment(), mimetype)); + } + } + + /** + * Parse the fragment of the given URL, according to the default rules (XML + * MIME type), see {@link URLFragment#parse(URL, String)}. + * + * @param url + * a URL + * @return a parsed fragment (cannot benull) + */ + public static URLFragment parse(URL url) + { + return parse(url, ""); + } + + private static final class Parser + { + private String fragment; + private String scheme; + private String id; + private boolean isMediaFragment = false; + private boolean isValid = true; + + /* + * Parse the fragment, by dispatching to a type-specific method. + * + * Note (2022): parsing would likely be more efficient if implemented as a + * state parser instead of using regex-based string matching. + */ + private Parser parse(String fragment, String mimetype) + { + this.fragment = fragment; + if (fragment != null) + { + switch (MIMEType.get(mimetype)) + { + case SVG: + parseSVGFragment(fragment); + break; + case HTML: + case XHTML: + parseHTMLFragment(fragment); + break; + default: + parseXMLFragment(fragment); + break; + } + } + return this; + } + + private static final Pattern SCHEME_BASED = Pattern.compile("(\\w+)\\(.*\\)"); + private static final Pattern MEDIA_FRAGMENT = Pattern + .compile("(t|xywh|track|id|xyn|xyr)=[^&]+(&[^&=]+=[^&]+)*"); + + // Parses an XML fragment identifier + private void parseXMLFragment(String fragment) + { + Matcher matcher; + // Schema based + if ((matcher = SCHEME_BASED.matcher(fragment)).matches()) + { + this.scheme = matcher.group(1); + } + // ID fragment + else + { + this.id = URLUtils.percentDecode(fragment); + this.isValid = NameChecker.isValidNCName(id); + } + } + + /* + * Parses an HTML fragment identifier + */ + private void parseHTMLFragment(String fragment) + { + Matcher matcher; + // strip fragment directive + // see https://wicg.github.io/scroll-to-text-fragment/ + int index; + if ((index = fragment.indexOf(":~:")) > -1) + { + fragment = fragment.substring(0, index); + } + // scheme-based fragment + if ((matcher = SCHEME_BASED.matcher(fragment)).matches()) + { + this.scheme = matcher.group(1); + } + // media fragment + else if ((matcher = MEDIA_FRAGMENT.matcher(fragment)).matches()) + { + this.isMediaFragment = true; + } + // ID fragment + else + { + this.id = URLUtils.percentDecode(fragment); + } + } + + /* + * Parses an SVG fragment identifier, see: + * https://www.w3.org/TR/SVG/linking.html#SVGFragmentIdentifiersDefinitions + */ + private void parseSVGFragment(String fragment) + { + + if (fragment.isEmpty()) return; + + // Split the fragment into &-separated components + Iterator components = Splitter.on('&').split(fragment).iterator(); + String first = components.next(); + + // SVG view specification + if (first.startsWith("svgView(")) + { + // check the SVG view is well-formed + isValid = parseSVGView(first); + // check optional remaining components are well-formed time segments + while (isValid && components.hasNext()) + { + isValid = parseTimeSegment(components.next()); + } + } + // Temporal media fragment + else if (first.startsWith("t=")) + { + isMediaFragment = true; + // check the first component is a well-formed time segment + isValid = parseTimeSegment(first); + // check optional remaining components are well-formed space segments + while (isValid && components.hasNext()) + { + isValid = parseSpaceSegment(components.next()); + } + } + // Spatial media fragment + else if (first.startsWith("xywh=")) + { + isMediaFragment = true; + // check the first component is a well-formed space segment + isValid = parseSpaceSegment(first); + // check optional remaining components are well-formed time segments + while (isValid && components.hasNext()) + { + isValid = parseTimeSegment(components.next()); + } + } + else if (first.contains("=")) + { + isValid = false; + } + // Shorthand bare name + else + { + // Record the ID, percent-decoded + this.id = URLUtils.percentDecode(first); + // check validity of the ID + this.isValid = NameChecker.isValidNCName(id); + // check optional remaining components are well-formed time segments + while (isValid && components.hasNext()) + { + isValid = parseTimeSegment(components.next()); + } + } + } + + private static final Pattern SVGVIEW = Pattern.compile("svgView\\(.+\\)"); + + private boolean parseSVGView(String string) + { + return isValid = SVGVIEW.matcher(string).matches(); + } + + private static final Pattern SPATIAL = Pattern + .compile("xywh=(pixel:|percent:)?\\d+,\\d+,\\d+,\\d+"); + + private boolean parseSpaceSegment(String string) + { + return isValid = SPATIAL.matcher(string).matches(); + } + + private static final Pattern TEMPORAL = Pattern + .compile("t=(?:npt:)?(?:([0-9.:]+)(?:,([0-9.:]+))?|,([0-9.:]+))"); + private static final Pattern NPTTIME = Pattern + .compile("((\\d+)|([0-5]\\d:[0-5]\\d)|(\\d+:[0-5]\\d:[0-5]\\d))(\\.\\d*)?"); + + private boolean parseTimeSegment(String string) + { + Matcher matcher = TEMPORAL.matcher(string); + if (isValid = matcher.matches()) + { + int i = 1; + while (isValid && i <= matcher.groupCount()) + { + isValid = matcher.group(i) == null || NPTTIME.matcher(matcher.group(i)).matches(); + i++; + } + } + return isValid; + } + } + +} diff --git a/src/test/java/org/w3c/epubcheck/url/URLFragmentSteps.java b/src/test/java/org/w3c/epubcheck/url/URLFragmentSteps.java new file mode 100644 index 000000000..4f62a0b74 --- /dev/null +++ b/src/test/java/org/w3c/epubcheck/url/URLFragmentSteps.java @@ -0,0 +1,67 @@ +package org.w3c.epubcheck.url; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.emptyString; +import static org.hamcrest.Matchers.is; +import static org.junit.Assert.assertTrue; + +import java.net.URI; + +import org.w3c.epubcheck.constants.MIMEType; + +import com.google.common.base.Enums; + +import io.cucumber.java.en.Then; +import io.mola.galimatias.GalimatiasParseException; +import io.mola.galimatias.URL; + +public class URLFragmentSteps +{ + + private static final URL BASE_URL = URL.fromJavaURI(URI.create("https://example.org")); + + private URLFragment result; + + @Then("{string} is a {} {} fragment") + public void testSVGFragment(String fragment, String validity, String type) + { + result = parse(fragment, + Enums.getIfPresent(MIMEType.class, type).or(MIMEType.OTHER).toString()); + assertThat((result.isValid()) ? "valid" : "invalid", is(validity)); + } + + @Then("it indicates an element with ID {string}") + public void assertID(String id) + { + assertThat(result.getId(), is(id)); + } + + @Then("it does not indicate an element") + public void assertIDIsEmpty() + { + assertThat(result.getId(), is(emptyString())); + } + + @Then("it is a media fragment") + public void assertMediaFragment() + { + assertTrue(result.isMediaFragment()); + } + + @Then("it has scheme {string}") + public void assertScheme(String scheme) + { + assertThat(result.getScheme(), is(scheme)); + } + + private URLFragment parse(String fragment, String mimetype) + { + try + { + return URLFragment.parse(BASE_URL.withFragment(fragment), mimetype); + } catch (GalimatiasParseException e) + { + throw new AssertionError("Could not create URL with fragment " + fragment, e); + } + } +} diff --git a/src/test/resources/epub-previews/files/epub/preview-embedded-link-cfi-error/EPUB/package.opf b/src/test/resources/epub-previews/files/epub/preview-embedded-link-cfi-error/EPUB/package.opf index 07be897c3..084ecc944 100644 --- a/src/test/resources/epub-previews/files/epub/preview-embedded-link-cfi-error/EPUB/package.opf +++ b/src/test/resources/epub-previews/files/epub/preview-embedded-link-cfi-error/EPUB/package.opf @@ -24,6 +24,6 @@ - + \ No newline at end of file diff --git a/src/test/resources/unit-tests/url-fragment.feature b/src/test/resources/unit-tests/url-fragment.feature new file mode 100644 index 000000000..aba0e8712 --- /dev/null +++ b/src/test/resources/unit-tests/url-fragment.feature @@ -0,0 +1,105 @@ +Feature: URL fragment parser + + Tests the parser for URL fragments + + Scenario Outline: HTML ID-based fragment + * is a valid HTML fragment + And it indicates an element with ID + + Scenarios: + | fragment | id | + | "id" | "id" | + | "%40%40" | "@@" | + | "id:~:text=a,b" | "id" | + + Scenarios: Text fragments (experimental) + | fragment | id | + | "id:~:text=a,b" | "id" | + | ":~:text=a,b" | "" | + + Scenarios: "invalid" non-ID-based fragments are processed as IDs + | fragment | id | + | "foo=bar" | "foo=bar" | + | "epubcfi(" | "epubcfi(" | + + + Scenario Outline: HTML scheme-based fragment + * is a valid HTML fragment + And it has scheme + And it does not indicate an element + + Scenarios: + | fragment | scheme | + | "xpointer(id(foo))" | "xpointer" | + | "epubcfi(/6/4[chap01ref]!/4[body01])" | "epubcfi" | + + Scenario Outline: HTML media fragment + * is a valid HTML fragment + And it is a media fragment + And it does not indicate an element + + Scenarios: + | fragment | + | "xywh=1,1,1,1" | + | "t=10" | + | "track=audio" | + | "id=foo" | + + Scenario Outline: SVG shorthand fragment + * is a SVG fragment + And it indicates an element with ID + + Scenarios: Shorthand fragments + + | fragment | validity | id | + | "id" | valid | "id" | + | "id&t=10" | valid | "id" | + | "id&t=10&t=5" | valid | "id" | + | "id&foo=bar" | invalid | "id" | + | "id&t=" | invalid | "id" | + | "id&" | invalid | "id" | + | "*id" | invalid | "*id" | (not an NCName) + | "%40%40" | invalid | "@@" | (not an NCName) + + + Scenario Outline: SVG media fragment + * is a SVG fragment + + Scenarios: Temporal media fragment + | fragment | validity | + | "t=npt:10,20" | valid | + | "t=npt:,121.5" | valid | + | "t=0:02:00,121.5" | valid | + | "t=npt:120,0:02:01." | valid | + | "t=60:00" | invalid | + | "t=00:99" | invalid | + | "t=123:00:00" | valid | + | "t=10&xywh=0,0,1,1" | valid | + + Scenarios: Spatial media fragment + | fragment | validity | + | "xywh=160,120,320,240" | valid | + | "xywh=pixel:160,120,320,240" | valid | + | "xywh=percent:25,25,50,50" | valid | + | "xywh=160,120,320" | invalid | + | "xywh=px:160,120,320,240" | invalid | + + Scenarios: SVG view specification + | fragment | validity | + | "svgView(viewBox(0,0,200,200))" | valid | + | "svgView(preserveAspectRatio(xMidYMid))" | valid | + | "svgView(transform(scale(5))" | valid | + | "svgView()" | invalid | + | "svgView(viewBox(0,0,200,200" | invalid | + + Scenario Outline: SVG invalid fragments + Should not be parsed as legit IDs + * is a SVG fragment + And it indicates an element with ID + + Scenarios: Unknown or invalid media fragments + | fragment | validity | id | + | "foo=bar" | invalid | "" | + | "foo=" | invalid | "" | + | "=foo" | invalid | "" | +