Skip to content

Commit

Permalink
Pass arguments to parser modules (#446)
Browse files Browse the repository at this point in the history
* Add optional arguments to pass to the parser module when parsing documents and fragments

* Add tests for parse_document and parse_fragment with arguments

---------

Co-authored-by: Rick Littel <[email protected]>
  • Loading branch information
Kuret and Rick Littel authored Feb 14, 2023
1 parent da1db15 commit f3499c6
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 12 deletions.
12 changes: 8 additions & 4 deletions lib/floki/html_parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,19 @@ defmodule Floki.HTMLParser do

@default_parser Floki.HTMLParser.Mochiweb

@callback parse_document(binary()) :: {:ok, Floki.html_tree()} | {:error, String.t()}
@callback parse_fragment(binary()) :: {:ok, Floki.html_tree()} | {:error, String.t()}
@callback parse_document(binary(), list()) :: {:ok, Floki.html_tree()} | {:error, String.t()}
@callback parse_fragment(binary(), list()) :: {:ok, Floki.html_tree()} | {:error, String.t()}

def parse_document(html, opts \\ []) do
parser(opts).parse_document(html)
parser_args = opts[:parser_args] || []

parser(opts).parse_document(html, parser_args)
end

def parse_fragment(html, opts \\ []) do
parser(opts).parse_fragment(html)
parser_args = opts[:parser_args] || []

parser(opts).parse_fragment(html, parser_args)
end

defp parser(opts) do
Expand Down
8 changes: 4 additions & 4 deletions lib/floki/html_parser/fast_html.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ defmodule Floki.HTMLParser.FastHtml do
@moduledoc false

@impl true
def parse_document(html) do
execute_with_module(fn module -> module.decode(html) end)
def parse_document(html, args) do
execute_with_module(fn module -> module.decode(html, args) end)
end

@impl true
def parse_fragment(html) do
execute_with_module(fn module -> module.decode_fragment(html) end)
def parse_fragment(html, args) do
execute_with_module(fn module -> module.decode_fragment(html, args) end)
end

defp execute_with_module(fun) do
Expand Down
4 changes: 2 additions & 2 deletions lib/floki/html_parser/html5ever.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defmodule Floki.HTMLParser.Html5ever do
@moduledoc false

@impl true
def parse_document(html) do
def parse_document(html, _args) do
case Code.ensure_loaded(Html5ever) do
{:module, module} ->
case module.parse(html) do
Expand All @@ -22,5 +22,5 @@ defmodule Floki.HTMLParser.Html5ever do

# NOTE: html5ever does not implement parse_fragment yet.
@impl true
def parse_fragment(html), do: parse_document(html)
def parse_fragment(html, args), do: parse_document(html, args)
end
4 changes: 2 additions & 2 deletions lib/floki/html_parser/mochiweb.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defmodule Floki.HTMLParser.Mochiweb do
@root_node "floki"

@impl true
def parse_document(html) do
def parse_document(html, _args) do
html = "<#{@root_node}>#{html}</#{@root_node}>"
{@root_node, [], parsed} = :floki_mochi_html.parse(html)

Expand All @@ -14,5 +14,5 @@ defmodule Floki.HTMLParser.Mochiweb do

# NOTE: mochi_html cannot make a distinction of a fragment and document.
@impl true
def parse_fragment(html), do: parse_document(html)
def parse_fragment(html, args), do: parse_document(html, args)
end
74 changes: 74 additions & 0 deletions test/floki_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,80 @@ defmodule FlokiTest do

Enum.each(@plain_text_tags, validate_html)
end

@tag only_parser: FastHtml
test "parses all elements as strings by default" do
html = html_body(~s(<div><p>Content</p><custom>Custom</custom></div>))

{:ok, parsed} = Floki.parse_document(html)

assert [
{
"html",
[],
[
{"head", [], []},
{
"body",
[],
[
{"div", [], [{"p", [], ["Content"]}, {"custom", [], ["Custom"]}]}
]
}
]
}
] = parsed
end

@tag only_parser: FastHtml
test "parses known elements as atoms when :html_atoms format argument is given" do
html = html_body(~s(<div><p>Content</p><custom>Custom</custom></div>))

{:ok, parsed} = Floki.parse_document(html, parser_args: [format: [:html_atoms]])

assert [
{
:html,
[],
[
{:head, [], []},
{
:body,
[],
[
{:div, [], [{:p, [], ["Content"]}, {"custom", [], ["Custom"]}]}
]
}
]
}
] == parsed
end
end

describe "parse_fragment/2" do
@tag only_parser: FastHtml
test "does not parse a table row with missing parent table tag by default" do
html = "<tr><td>Column 1</td><td>Column 2</td></tr>"

{:ok, parsed} = Floki.parse_fragment(html)

assert ["Column 1Column 2"] == parsed
end

@tag only_parser: FastHtml
test "parses a table row with missing parent table tag when table context is given" do
html = "<tr><td>1</td><td>2</td></tr>"

{:ok, parsed} = Floki.parse_fragment(html, parser_args: [context: "table"])

assert [
{
"tbody",
[],
[{"tr", [], [{"td", [], ["1"]}, {"td", [], ["2"]}]}]
}
] == parsed
end
end

# Floki.raw_html/2
Expand Down

0 comments on commit f3499c6

Please sign in to comment.