Skip to content

Commit

Permalink
🐛 Escape annotated HTML tags in span renderer (#12817)
Browse files Browse the repository at this point in the history
These changes add a missing call to `escape_html` in the displaCy span
renderer. Previously span-annotated tokens would be inserted into the
page markup without being escaped, resulting in potentially incorrect
rendering. When I encountered this issue, it resulted in some docs and
span underlines being superimposed on top of properly rendered docs and
span underlines near the beginning of the visualization (due to an
unescaped `<span>` tag).
  • Loading branch information
connorbrinton authored Jul 13, 2023
1 parent ddffd09 commit 0566c3a
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
3 changes: 1 addition & 2 deletions spacy/displacy/render.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import itertools
import uuid
from typing import Any, Dict, List, Optional, Tuple, Union

Expand Down Expand Up @@ -218,7 +217,7 @@ def _render_markup(self, per_token_info: List[Dict[str, Any]]) -> str:
+ (self.offset_step * (len(entities) - 1))
)
markup += self.span_template.format(
text=token["text"],
text=escape_html(token["text"]),
span_slices=slices,
span_starts=starts,
total_height=total_height,
Expand Down
19 changes: 19 additions & 0 deletions spacy/tests/test_displacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,22 @@ def test_displacy_manual_sorted_entities():

html = displacy.render(doc, style="ent", manual=True)
assert html.find("FIRST") < html.find("SECOND")


@pytest.mark.issue(12816)
def test_issue12816(en_vocab) -> None:
"""Test that displaCy's span visualizer escapes annotated HTML tags correctly."""
# Create a doc containing an annotated word and an unannotated HTML tag
doc = Doc(en_vocab, words=["test", "<TEST>"])
doc.spans["sc"] = [Span(doc, 0, 1, label="test")]

# Verify that the HTML tag is escaped when unannotated
html = displacy.render(doc, style="span")
assert "&lt;TEST&gt;" in html

# Annotate the HTML tag
doc.spans["sc"].append(Span(doc, 1, 2, label="test"))

# Verify that the HTML tag is still escaped
html = displacy.render(doc, style="span")
assert "&lt;TEST&gt;" in html

0 comments on commit 0566c3a

Please sign in to comment.