Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Markdown parser fix for contents with multibyte characters #365

Merged
merged 3 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 33 additions & 32 deletions autoload/lsp/markdown.vim
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var code_fence = '^ \{,3\}\(`\{3,\}\|\~\{3,\}\)\s*\(\S*\)'
var code_indent = '^ \{4\}\zs\s*\S.*'
var paragraph = '^\s*\zs\S.\{-}\s*\ze$'

var atx_heading = '^ \{,3}\zs\(#\{1,6}\) \(.\{-}\)\ze\%( #\{1,}\s*\)\=$'
var atx_heading = '^ \{,3}\zs\(#\{1,6}\) \s*\(.\{-}\)\s*\ze\%( #\{1,}\s*\)\=$'
var setext_heading = '^ \{,3}\zs\%(=\{1,}\|-\{1,}\)\ze *$'
var setext_heading_level = {"=": 1, "-": 2}

Expand Down Expand Up @@ -414,40 +414,41 @@ enddef

def SplitLine(line: dict<any>, indent: number = 0): list<dict<any>>
var lines: list<dict<any>> = []
var pos = line.text->match('\n')
if pos < 0
var tokens: list<string> = line.text->split("\n", true)
if tokens->len() == 1
lines->add(line)
return lines
endif
var cur_line: dict<any> = {
text: line.text[: pos - 1],
props: []
}
var next_line: dict<any> = {
text: (' '->repeat(indent) .. line.text[pos + 1 :]),
props: []
}
for prop in line.props
if prop.col + prop.length - 1 < pos + 1
cur_line.props->add(prop)
elseif prop.col > pos + 1
prop.col -= pos - indent + 1
next_line.props->add(prop)
else
cur_line.props->add({
type: prop.type,
col: prop.col,
length: pos - prop.col + 1
})
next_line.props->add({
type: prop.type,
col: indent + 1,
length: prop.col + prop.length - pos - 2
})
endif
var props: list<dict<any>> = line.props
for cur_text in tokens
var cur_props: list<dict<any>> = []
var next_props: list<dict<any>> = []
var length: number = cur_text->len()
for prop in props
if prop.col + prop.length - 1 <= length
cur_props->add(prop)
elseif prop.col > length
prop.col -= length + 1
next_props->add(prop)
else
var cur_length: number = length - prop.col + 1
cur_props->add({
type: prop.type,
col: prop.col,
length: cur_length
})
prop.col = 1
prop.length -= cur_length + 1
next_props->add(prop)
endif
endfor
lines->add({
text: cur_text,
props: cur_props
})
props = next_props
endfor
lines->add(cur_line)
return lines + SplitLine(next_line, indent)
return lines
enddef

var last_block: string = ''
Expand Down Expand Up @@ -599,7 +600,7 @@ def ExpandTabs(line: string): string
var begin: string = ""
for char in block_marker[0]
if char == ' '
begin ..= ' '->repeat(4 - (begin->strlen() % 4))
begin ..= ' '->repeat(4 - (begin->len() % 4))
else
begin ..= char
endif
Expand Down
102 changes: 98 additions & 4 deletions test/markdown_tests.vim
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,46 @@ def g:Test_Markdown()
[
'# First level heading',
'## Second level heading',
'### Third level heading'
'### Third level heading',
'# Heading with leading and trailing whitespaces ',
'Multiline setext heading ',
'of level 1',
'===',
'Multiline setext heading\',
'of level 2',
'---'
],
# Expected text
[
'First level heading',
'',
'Second level heading',
'',
'Third level heading'
'Third level heading',
'',
'Heading with leading and trailing whitespaces',
'',
'Multiline setext heading',
'of level 1',
'',
'Multiline setext heading',
'of level 2'
],
# Expected text properties
[
[{'col': 1, 'type': 'LspMarkdownHeading', 'length': 19}],
[],
[{'col': 1, 'type': 'LspMarkdownHeading', 'length': 20}],
[],
[{'col': 1, 'type': 'LspMarkdownHeading', 'length': 19}]
[{'col': 1, 'type': 'LspMarkdownHeading', 'length': 19}],
[],
[{'col': 1, 'type': 'LspMarkdownHeading', 'length': 45}],
[],
[{'col': 1, 'type': 'LspMarkdownHeading', 'length': 24}],
[{'col': 1, 'type': 'LspMarkdownHeading', 'length': 10}],
[],
[{'col': 1, 'type': 'LspMarkdownHeading', 'length': 24}],
[{'col': 1, 'type': 'LspMarkdownHeading', 'length': 10}],
]
],
[
Expand Down Expand Up @@ -173,6 +196,77 @@ def g:Test_Markdown()
[
[], [], []
]
],
[
# line breaks
# Input text
[
'This paragraph contains ',
'a soft line break',
'',
'This paragraph contains ',
'an hard line break',
'',
'This paragraph contains an emphasis _before_\',
'an hard line break',
'',
'This paragraph contains an emphasis ',
'_after_ an hard line break',
'',
'This paragraph _contains\',
'an emphasis_ with an hard line break in the middle',
'',
'→ This paragraph contains an hard line break ',
'and starts with the multibyte character "\u2192"',
'',
'Line breaks `',
'do\',
'not ',
'occur',
'` inside code spans'
],
# Expected text
[
'This paragraph contains a soft line break',
'',
'This paragraph contains',
'an hard line break',
'',
'This paragraph contains an emphasis before',
'an hard line break',
'',
'This paragraph contains an emphasis',
'after an hard line break',
'',
'This paragraph contains',
'an emphasis with an hard line break in the middle',
'',
'→ This paragraph contains an hard line break',
'and starts with the multibyte character "\u2192"',
'',
'Line breaks do\ not occur inside code spans'
],
# Expected text properties
[
[],
[],
[],
[],
[],
[{'col': 37, 'type': 'LspMarkdownItalic', 'length': 6}],
[],
[],
[],
[{'col': 1, 'type': 'LspMarkdownItalic', 'length': 5}],
[],
[{'col': 16, 'type': 'LspMarkdownItalic', 'length': 8}],
[{'col': 1, 'type': 'LspMarkdownItalic', 'length': 11}],
[],
[],
[],
[],
[{'col': 13, 'type': 'LspMarkdownCode', 'length': 15}]
]
]
]

Expand All @@ -193,4 +287,4 @@ def g:StartLangServer(): bool
return true
enddef

# vim: shiftwidth=2 softtabstop=2 noexpandtab
# vim: tabstop=8 shiftwidth=2 softtabstop=2 noexpandtab