Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rustdoc: improve rustdoc HTML suggestions handling of nested generics #101731

Merged
merged 1 commit into from
Sep 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 80 additions & 3 deletions src/librustdoc/passes/html_tags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,34 @@ fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
if current_pos == end_pos { None } else { Some(current_pos) }
}

fn extract_path_forward(text: &str, start_pos: usize) -> Option<usize> {
use rustc_lexer::{is_id_continue, is_id_start};
let mut current_pos = start_pos;
loop {
if current_pos < text.len() && text[current_pos..].starts_with("::") {
current_pos += 2;
} else {
break;
}
let mut chars = text[current_pos..].chars();
if let Some(c) = chars.next() {
if is_id_start(c) {
current_pos += c.len_utf8();
} else {
break;
}
}
while let Some(c) = chars.next() {
if is_id_continue(c) {
current_pos += c.len_utf8();
} else {
break;
}
}
}
if current_pos == start_pos { None } else { Some(current_pos) }
}

fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool {
// https://spec.commonmark.org/0.30/#raw-html
//
Expand Down Expand Up @@ -218,19 +246,68 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
// If a tag looks like `<this>`, it might actually be a generic.
// We don't try to detect stuff `<like, this>` because that's not valid HTML,
// and we don't try to detect stuff `<like this>` because that's not valid Rust.
if let Some(Some(generics_start)) = (is_open_tag
&& dox[..range.end].ends_with('>'))
let mut generics_end = range.end;
if let Some(Some(mut generics_start)) = (is_open_tag
&& dox[..generics_end].ends_with('>'))
.then(|| extract_path_backwards(&dox, range.start))
{
while generics_start != 0
&& generics_end < dox.len()
&& dox.as_bytes()[generics_start - 1] == b'<'
&& dox.as_bytes()[generics_end] == b'>'
{
generics_end += 1;
generics_start -= 1;
if let Some(new_start) = extract_path_backwards(&dox, generics_start) {
generics_start = new_start;
}
if let Some(new_end) = extract_path_forward(&dox, generics_end) {
generics_end = new_end;
}
}
if let Some(new_end) = extract_path_forward(&dox, generics_end) {
generics_end = new_end;
}
let generics_sp = match super::source_span_for_markdown_range(
tcx,
&dox,
&(generics_start..range.end),
&(generics_start..generics_end),
&item.attrs,
) {
Some(sp) => sp,
None => item.attr_span(tcx),
};
// Sometimes, we only extract part of a path. For example, consider this:
//
// <[u32] as IntoIter<u32>>::Item
// ^^^^^ unclosed HTML tag `u32`
//
// We don't have any code for parsing fully-qualified trait paths.
// In theory, we could add it, but doing it correctly would require
// parsing the entire path grammar, which is problematic because of
// overlap between the path grammar and Markdown.
//
// The example above shows that ambiguity. Is `[u32]` intended to be an
// intra-doc link to the u32 primitive, or is it intended to be a slice?
//
// If the below conditional were removed, we would suggest this, which is
// not what the user probably wants.
//
// <[u32] as `IntoIter<u32>`>::Item
//
// We know that the user actually wants to wrap the whole thing in a code
// block, but the only reason we know that is because `u32` does not, in
// fact, implement IntoIter. If the example looks like this:
//
// <[Vec<i32>] as IntoIter<i32>::Item
//
// The ideal fix would be significantly different.
if (generics_start > 0 && dox.as_bytes()[generics_start - 1] == b'<')
|| (generics_end < dox.len() && dox.as_bytes()[generics_end] == b'>')
{
diag.emit();
return;
}
// multipart form is chosen here because ``Vec<i32>`` would be confusing.
diag.multipart_suggestion(
"try marking as source code",
Expand Down
42 changes: 42 additions & 0 deletions src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,48 @@ pub struct ConstGeneric;
// HTML tags cannot contain commas, so no error.
pub struct MultipleGenerics;

/// This <[u32] as Iterator<Item>> thing!
//~^ERROR unclosed HTML tag `Item`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
//
// The important part is that we don't produce any *wrong* suggestions.
// While several other examples below are added to make sure we don't
// produce suggestions when given complex paths, this example is the actual
// reason behind not just using the real path parser. It's ambiguous: there's
// no way to locally reason out whether that `[u32]` is intended to be a slice
// or an intra-doc link.
pub struct FullyQualifiedPathsDoNotCount;

/// This <Vec as IntoIter>::Iter thing!
//~^ERROR unclosed HTML tag `Vec`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount1;

/// This Vec<Vec as IntoIter>::Iter thing!
//~^ERROR unclosed HTML tag `Vec`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount2;

/// This Vec<Vec as IntoIter> thing!
//~^ERROR unclosed HTML tag `Vec`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount3;

/// This Vec<Vec<i32> as IntoIter> thing!
//~^ERROR unclosed HTML tag `i32`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount4;

/// This Vec<i32 class="test"> thing!
//~^ERROR unclosed HTML tag `i32`
// HTML attributes shouldn't be treated as Rust syntax, so no suggestions.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,38 +1,68 @@
error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:11:13
error: unclosed HTML tag `Item`
--> $DIR/html-as-generics-no-suggestions.rs:11:28
|
LL | /// This Vec<i32 class="test"> thing!
| ^^^^
LL | /// This <[u32] as Iterator<Item>> thing!
| ^^^^^^
|
note: the lint level is defined here
--> $DIR/html-as-generics-no-suggestions.rs:1:9
|
LL | #![deny(rustdoc::invalid_html_tags)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^

error: unclosed HTML tag `Vec`
--> $DIR/html-as-generics-no-suggestions.rs:25:10
|
LL | /// This <Vec as IntoIter>::Iter thing!
| ^^^^

error: unclosed HTML tag `Vec`
--> $DIR/html-as-generics-no-suggestions.rs:32:13
|
LL | /// This Vec<Vec as IntoIter>::Iter thing!
| ^^^^

error: unclosed HTML tag `Vec`
--> $DIR/html-as-generics-no-suggestions.rs:39:13
|
LL | /// This Vec<Vec as IntoIter> thing!
| ^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:46:17
|
LL | /// This Vec<Vec<i32> as IntoIter> thing!
| ^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:53:13
|
LL | /// This Vec<i32 class="test"> thing!
| ^^^^

error: unopened HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:20:13
--> $DIR/html-as-generics-no-suggestions.rs:62:13
|
LL | /// This Vec</i32> thing!
| ^^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:25:13
--> $DIR/html-as-generics-no-suggestions.rs:67:13
|
LL | /// This 123<i32> thing!
| ^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:30:14
--> $DIR/html-as-generics-no-suggestions.rs:72:14
|
LL | /// This Vec:<i32> thing!
| ^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:35:39
--> $DIR/html-as-generics-no-suggestions.rs:77:39
|
LL | /// This [link](https://rust-lang.org)<i32> thing!
| ^^^^^

error: aborting due to 5 previous errors
error: aborting due to 10 previous errors

40 changes: 40 additions & 0 deletions src/test/rustdoc-ui/suggestions/html-as-generics.fixed
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,43 @@ pub struct BareTurbofish;
//~^ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct Nested;

/// Nested generics `Vec<Vec<u32>>`
//~^ ERROR unclosed HTML tag `u32`
//~|HELP try marking as source
pub struct NestedGenerics;

/// Generics with path `Vec<i32>::Iter`
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct GenericsWithPath;

/// Generics with path `<Vec<i32>>::Iter`
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath;

/// Generics with path `Vec<Vec<i32>>::Iter`
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath2;

/// Generics with bump `<Vec<i32>>`s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump;

/// Generics with bump `Vec<Vec<i32>>`s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump2;

/// Generics with punct `<Vec<i32>>`!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct;

/// Generics with punct `Vec<Vec<i32>>`!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct2;
40 changes: 40 additions & 0 deletions src/test/rustdoc-ui/suggestions/html-as-generics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,43 @@ pub struct BareTurbofish;
//~^ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct Nested;

/// Nested generics Vec<Vec<u32>>
//~^ ERROR unclosed HTML tag `u32`
//~|HELP try marking as source
pub struct NestedGenerics;

/// Generics with path Vec<i32>::Iter
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct GenericsWithPath;

/// Generics with path <Vec<i32>>::Iter
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath;

/// Generics with path Vec<Vec<i32>>::Iter
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath2;

/// Generics with bump <Vec<i32>>s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump;

/// Generics with bump Vec<Vec<i32>>s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump2;

/// Generics with punct <Vec<i32>>!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct;

/// Generics with punct Vec<Vec<i32>>!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct2;
Loading