Skip to content

Commit

Permalink
Check comments in programs
Browse files Browse the repository at this point in the history
  • Loading branch information
valentjn committed Jul 17, 2021
1 parent dea2341 commit 5ade1aa
Show file tree
Hide file tree
Showing 13 changed files with 678 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

## 12.4.0 (upcoming)

- Add support for checking comments in many popular programming languages (fixes [vscode-ltex#350](https:/valentjn/vscode-ltex/issues/350))
- Remove support for magic comments in XHTML

## 12.3.0 (July 12, 2021)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Find more information about LT<sub>E</sub>X at the [website of vscode-ltex](http
## Features

- **Supported markup languages:** BibT<sub>E</sub>X, L<sup>A</sup>T<sub>E</sub>X, Markdown, Org, reStructuredText, R Sweave, XHTML
- Comment checking in **many popular programming languages** (optional, opt-in)
- Comes with **everything included,** no need to install Java or LanguageTool
- **Offline checking:** Does not upload anything to the internet
- Supports **over 20 languages:** English, French, German, Dutch, Chinese, Russian, etc.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import org.bsplines.ltexls.parsing.nop.NopAnnotatedTextBuilder;
import org.bsplines.ltexls.parsing.org.OrgAnnotatedTextBuilder;
import org.bsplines.ltexls.parsing.plaintext.PlaintextAnnotatedTextBuilder;
import org.bsplines.ltexls.parsing.program.ProgramAnnotatedTextBuilder;
import org.bsplines.ltexls.parsing.program.ProgramCommentPatterns;
import org.bsplines.ltexls.parsing.restructuredtext.RestructuredtextAnnotatedTextBuilder;
import org.bsplines.ltexls.settings.Settings;
import org.bsplines.ltexls.tools.Tools;
Expand Down Expand Up @@ -61,6 +63,8 @@ public static CodeAnnotatedTextBuilder create(String codeLanguageId) {

if (constructor != null) {
return constructor.apply(codeLanguageId);
} else if (ProgramCommentPatterns.isSupportedCodeLanguageId(codeLanguageId)) {
return new ProgramAnnotatedTextBuilder(codeLanguageId);
} else {
Tools.logger.warning(Tools.i18n("unsupportedCodeLanguageId", codeLanguageId));
return new PlaintextAnnotatedTextBuilder("plaintext");
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/org/bsplines/ltexls/parsing/CodeFragment.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ public CodeFragment withFromPos(int fromPos) {
return obj;
}

public CodeFragment withSettings(Settings settings) {
CodeFragment obj = new CodeFragment(this);
obj.settings = settings;
return obj;
}

public boolean contains(LanguageToolRuleMatch match) {
return ((match.getFromPos() >= this.fromPos)
&& (match.getToPos() <= this.fromPos + this.code.length()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import org.bsplines.ltexls.parsing.nop.NopFragmentizer;
import org.bsplines.ltexls.parsing.org.OrgFragmentizer;
import org.bsplines.ltexls.parsing.plaintext.PlaintextFragmentizer;
import org.bsplines.ltexls.parsing.program.ProgramCommentPatterns;
import org.bsplines.ltexls.parsing.program.ProgramFragmentizer;
import org.bsplines.ltexls.parsing.restructuredtext.RestructuredtextFragmentizer;
import org.bsplines.ltexls.settings.Settings;
import org.bsplines.ltexls.tools.Tools;
Expand Down Expand Up @@ -62,6 +64,8 @@ public static CodeFragmentizer create(String codeLanguageId) {

if (constructor != null) {
return constructor.apply(codeLanguageId);
} else if (ProgramCommentPatterns.isSupportedCodeLanguageId(codeLanguageId)) {
return new ProgramFragmentizer(codeLanguageId);
} else {
Tools.logger.warning(Tools.i18n("unsupportedCodeLanguageId", codeLanguageId));
return new PlaintextFragmentizer("plaintext");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/* Copyright (C) 2020 Julian Valentin, LTeX Development Community
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

package org.bsplines.ltexls.parsing.program;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.bsplines.ltexls.parsing.CodeAnnotatedTextBuilder;
import org.bsplines.ltexls.parsing.markdown.MarkdownAnnotatedTextBuilder;
import org.bsplines.ltexls.tools.Tools;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.languagetool.markup.AnnotatedText;

public class ProgramAnnotatedTextBuilder extends CodeAnnotatedTextBuilder {
private static final Pattern lineSeparatorPattern = Pattern.compile("\r?\n");
private static final Pattern firstCharacterPattern = Pattern.compile(
"^[ \t]*(?:([#$%*+\\-/])|(.))");

private MarkdownAnnotatedTextBuilder markdownAnnotatedTextBuilder;
private Pattern commentBlockPattern;
private @Nullable String lineCommentPatternString;

public ProgramAnnotatedTextBuilder(String codeLanguageId) {
super(codeLanguageId);

this.markdownAnnotatedTextBuilder = new MarkdownAnnotatedTextBuilder("markdown");

ProgramCommentPatterns commentPatterns = new ProgramCommentPatterns(codeLanguageId);
this.commentBlockPattern = commentPatterns.getCommentBlockPattern();
this.lineCommentPatternString = commentPatterns.getLineCommentPatternString();
}

@Override
public CodeAnnotatedTextBuilder addCode(String code) {
Matcher commentBlockMatcher = this.commentBlockPattern.matcher(code);
int curPos = 0;

while (commentBlockMatcher.find()) {
int lastPos = curPos;
boolean isLineComment = (commentBlockMatcher.group("lineComment") != null);
String groupName = (isLineComment ? "lineComment" : "blockComment");
curPos = commentBlockMatcher.start(groupName);
this.markdownAnnotatedTextBuilder.addMarkup(code.substring(lastPos, curPos), "\n\n");
@Nullable String comment = commentBlockMatcher.group(groupName);

if (comment == null) {
Tools.logger.warning(Tools.i18n(
"couldNotFindExpectedGroupInRegularExpressionMatch", groupName));
continue;
}

addComment(comment, isLineComment);
curPos = commentBlockMatcher.end(groupName);
}

if (curPos < code.length()) this.markdownAnnotatedTextBuilder.addMarkup(code.substring(curPos));

return this;
}

private CodeAnnotatedTextBuilder addComment(String comment, boolean isLineComment) {
String commonFirstCharacter = "";

for (String line : lineSeparatorPattern.split(comment)) {
Matcher firstCharacterMatcher = firstCharacterPattern.matcher(line);
if (!firstCharacterMatcher.find()) continue;
@Nullable String firstCharacter = firstCharacterMatcher.group(1);

if (firstCharacter == null) {
commonFirstCharacter = "";
break;
}

if (commonFirstCharacter.isEmpty()) {
commonFirstCharacter = firstCharacter;
} else if (!firstCharacter.equals(commonFirstCharacter)) {
commonFirstCharacter = "";
break;
}
}

Pattern lineContentsPattern = Pattern.compile(
"[ \t]*"
+ ((isLineComment && (this.lineCommentPatternString != null))
? this.lineCommentPatternString : "")
+ "(?:" + Pattern.quote(commonFirstCharacter) + ")?[ \t]*(.*?)(?:\r?\n|$)");
Matcher lineContentsMatcher = lineContentsPattern.matcher(comment);
int curPos = 0;

while (lineContentsMatcher.find()) {
int lastPos = curPos;
curPos = lineContentsMatcher.start(1);
this.markdownAnnotatedTextBuilder.addMarkup(comment.substring(lastPos, curPos), "\n");

lastPos = curPos;
curPos = lineContentsMatcher.end(1);
this.markdownAnnotatedTextBuilder.addCode(comment.substring(lastPos, curPos));
}

if (curPos < comment.length()) {
this.markdownAnnotatedTextBuilder.addMarkup(comment.substring(curPos));
}

return this;
}

@Override
public AnnotatedText build() {
return this.markdownAnnotatedTextBuilder.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/* Copyright (C) 2020 Julian Valentin, LTeX Development Community
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

package org.bsplines.ltexls.parsing.program;

import java.util.regex.Pattern;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.checkerframework.checker.nullness.qual.Nullable;

public class ProgramCommentPatterns {
private @MonotonicNonNull String blockCommentStartPatternString;
private @MonotonicNonNull String blockCommentEndPatternString;
private @MonotonicNonNull String lineCommentPatternString;

public ProgramCommentPatterns(String codeLanguageId) {
if (codeLanguageId.equals("c")
|| codeLanguageId.equals("cpp")
|| codeLanguageId.equals("csharp")
|| codeLanguageId.equals("dart")
|| codeLanguageId.equals("fsharp")
|| codeLanguageId.equals("go")
|| codeLanguageId.equals("groovy")
|| codeLanguageId.equals("java")
|| codeLanguageId.equals("javascript")
|| codeLanguageId.equals("javascriptreact")
|| codeLanguageId.equals("kotlin")
|| codeLanguageId.equals("php")
|| codeLanguageId.equals("rust")
|| codeLanguageId.equals("scala")
|| codeLanguageId.equals("swift")
|| codeLanguageId.equals("typescript")
|| codeLanguageId.equals("typescriptreact")
|| codeLanguageId.equals("verilog")) {
this.blockCommentStartPatternString = "/\\*\\*?";
this.blockCommentEndPatternString = "\\*\\*?/";
this.lineCommentPatternString = "///?";
} else if (codeLanguageId.equals("elixir")
|| codeLanguageId.equals("python")) {
this.blockCommentStartPatternString = "\"\"\"";
this.blockCommentEndPatternString = "\"\"\"";
this.lineCommentPatternString = "##?";
} else if (codeLanguageId.equals("powershell")) {
this.blockCommentStartPatternString = "<#";
this.blockCommentEndPatternString = "#>";
this.lineCommentPatternString = "##?";
} else if (codeLanguageId.equals("coffeescript")
|| codeLanguageId.equals("julia")
|| codeLanguageId.equals("perl")
|| codeLanguageId.equals("perl6")
|| codeLanguageId.equals("puppet")
|| codeLanguageId.equals("r")
|| codeLanguageId.equals("ruby")
|| codeLanguageId.equals("shellscript")) {
this.lineCommentPatternString = "##?";
} else if (codeLanguageId.equals("lua")) {
this.blockCommentStartPatternString = "--\\[\\[";
this.blockCommentEndPatternString = "\\]\\]";
this.lineCommentPatternString = "---?";
} else if (codeLanguageId.equals("elm")
|| codeLanguageId.equals("haskell")) {
this.blockCommentStartPatternString = "\\{-";
this.blockCommentEndPatternString = "-\\}";
this.lineCommentPatternString = "---?";
} else if (codeLanguageId.equals("sql")) {
this.lineCommentPatternString = "---?";
} else if (codeLanguageId.equals("clojure")
|| codeLanguageId.equals("lisp")) {
this.lineCommentPatternString = ";;?";
} else if (codeLanguageId.equals("matlab")) {
this.blockCommentStartPatternString = "%\\{";
this.blockCommentEndPatternString = "%\\}";
this.lineCommentPatternString = "%%?";
} else if (codeLanguageId.equals("erlang")) {
this.lineCommentPatternString = "%%?";
} else if (codeLanguageId.equals("fortran-modern")) {
this.lineCommentPatternString = "c";
} else if (codeLanguageId.equals("vb")) {
this.lineCommentPatternString = "''?";
}
}

public static boolean isSupportedCodeLanguageId(String codeLanguageId) {
ProgramCommentPatterns patterns = new ProgramCommentPatterns(codeLanguageId);
return ((patterns.blockCommentStartPatternString != null)
|| (patterns.blockCommentEndPatternString != null)
|| (patterns.lineCommentPatternString != null));
}

public @Nullable String getBlockCommentStartPatternString() {
return this.blockCommentStartPatternString;
}

public @Nullable String getBlockCommentEndPatternString() {
return this.blockCommentEndPatternString;
}

public @Nullable String getLineCommentPatternString() {
return this.lineCommentPatternString;
}

public Pattern getCommentBlockPattern() {
StringBuilder patternStringBuilder = new StringBuilder();

if ((this.blockCommentStartPatternString != null)
&& (this.blockCommentEndPatternString != null)) {
if (patternStringBuilder.length() > 0) patternStringBuilder.append("|");
patternStringBuilder.append("^[ \t]*" + this.blockCommentStartPatternString
+ "(?:[ \t]|$)(?<blockComment>(?:(?!" + this.blockCommentEndPatternString
+ ").|\r?\n)*?)(?:[ \t]|^)" + this.blockCommentEndPatternString + "[ \t]*$");
}

if (this.lineCommentPatternString != null) {
if (patternStringBuilder.length() > 0) patternStringBuilder.append("|");
patternStringBuilder.append("(?<lineComment>(?:^[ \t]*" + this.lineCommentPatternString
+ "[ \t](?:.*?)$(?:\r?\n)?)+)");
}

return Pattern.compile(patternStringBuilder.toString(), Pattern.MULTILINE);
}

public Pattern getMagicCommentPattern() {
StringBuilder patternStringBuilder = new StringBuilder();

if ((this.blockCommentStartPatternString != null)
&& (this.blockCommentEndPatternString != null)) {
if (patternStringBuilder.length() > 0) patternStringBuilder.append("|");
patternStringBuilder.append("^[ \t]*" + this.blockCommentStartPatternString
+ "[ \t]*(?i)ltex(?-i):(.*?)[ \t]*" + this.blockCommentEndPatternString + "[ \t]*$");
}

if (this.lineCommentPatternString != null) {
if (patternStringBuilder.length() > 0) patternStringBuilder.append("|");
patternStringBuilder.append(
"^[ \t]*" + this.lineCommentPatternString
+ "[ \t]*(?i)ltex(?-i):(.*?)[ \t]*$");
}

return Pattern.compile(patternStringBuilder.toString(), Pattern.MULTILINE);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/* Copyright (C) 2020 Julian Valentin, LTeX Development Community
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

package org.bsplines.ltexls.parsing.program;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.bsplines.ltexls.parsing.CodeFragment;
import org.bsplines.ltexls.parsing.RegexCodeFragmentizer;
import org.bsplines.ltexls.settings.Settings;

public class ProgramFragmentizer extends RegexCodeFragmentizer {
public ProgramFragmentizer(String codeLanguageId) {
super(codeLanguageId, (new ProgramCommentPatterns(codeLanguageId)).getMagicCommentPattern());
}

@Override
public List<CodeFragment> fragmentize(String code, Settings originalSettings) {
List<CodeFragment> oldCodeFragments = super.fragmentize(code, originalSettings);
ArrayList<CodeFragment> result = new ArrayList<>();

String[] ruleIdsToDisable = {
"COPYRIGHT",
"DASH_RULE",
"R_SYMBOL",
"UPPERCASE_SENTENCE_START",
"WHITESPACE_RULE",
};

for (CodeFragment oldCodeFragment : oldCodeFragments) {
Settings settings = oldCodeFragment.getSettings();

HashSet<String> dictionary = new HashSet<>(settings.getDictionary());
dictionary.add("@param");
dictionary.add("param");
dictionary.add("@return");

HashSet<String> disabledRules = new HashSet<>(settings.getDisabledRules());

for (String ruleId : ruleIdsToDisable) {
if (!settings.getEnabledRules().contains(ruleId)) {
disabledRules.add(ruleId);
}
}

result.add(oldCodeFragment.withSettings(
settings.withDictionary(dictionary).withDisabledRules(disabledRules)));
}

return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/* Copyright (C) 2020 Julian Valentin, LTeX Development Community
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

@DefaultQualifier(NonNull.class)
package org.bsplines.ltexls.parsing.program;

import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.framework.qual.DefaultQualifier;
Loading

0 comments on commit 5ade1aa

Please sign in to comment.