FindTextInFiles doesn't support comma-separated globs with {} (micr…

…osoft#169885) * `FindTextInFiles` doesn't support comma-separated globs with {} Fixes microsoft#169422
c-claeys · Feb 16, 2023 · e4fbe1d · e4fbe1d
1 parent df99ecc
commit e4fbe1d
Show file tree

Hide file tree

Showing 2 changed files with 194 additions and 6 deletions.
diff --git a/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts b/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts
@@ -368,7 +368,8 @@ function getNumLinesAndLastNewlineLength(text: string): { numLines: number; last
  return { numLines, lastLineLength };
 }
 
-function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] {
+// exported for testing
+export function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] {
  const args = ['--hidden'];
  args.push(query.isCaseSensitive ? '--case-sensitive' : '--ignore-case');
 
@@ -495,9 +496,14 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[]
 /**
  * `"foo/*bar/something"` -> `["foo", "foo/*bar", "foo/*bar/something", "foo/*bar/something/**"]`
  */
-function spreadGlobComponents(globArg: string): string[] {
- const components = splitGlobAware(globArg, '/');
- return components.map((_, i) => components.slice(0, i + 1).join('/'));
+function spreadGlobComponents(globComponent: string): string[] {
+ const globComponentWithBraceExpansion = performBraceExpansionForRipgrep(globComponent);
+
+ return globComponentWithBraceExpansion.flatMap((globArg) => {
+ const components = splitGlobAware(globArg, '/');
+ return components.map((_, i) => components.slice(0, i + 1).join('/'));
+ });
+
 }
 
 export function unicodeEscapesToPCRE2(pattern: string): string {
@@ -629,3 +635,113 @@ export function fixRegexNewline(pattern: string): string {
 export function fixNewline(pattern: string): string {
  return pattern.replace(/\n/g, '\\r?\\n');
 }
+
+// brace expansion for ripgrep
+
+/**
+ * Split string given first opportunity for brace expansion in the string.
+ * - If the brace is prepended by a \ character, then it is escaped.
+ * - Does not process escapes that are within the sub-glob.
+ * - If two unescaped `{` occur before `}`, then ripgrep will return an error for brace nesting, so don't split on those.
+ */
+function getEscapeAwareSplitStringForRipgrep(pattern: string): { fixedStart?: string; strInBraces: string; fixedEnd?: string } {
+ let inBraces = false;
+ let escaped = false;
+ let fixedStart = '';
+ let strInBraces = '';
+ for (let i = 0; i < pattern.length; i++) {
+ const char = pattern[i];
+ switch (char) {
+ case '\\':
+ if (escaped) {
+ // If we're already escaped, then just leave the escaped slash and the preceeding slash that escapes it.
+ // The two escaped slashes will result in a single slash and whatever processes the glob later will properly process the escape
+ if (inBraces) {
+ strInBraces += '\\' + char;
+ } else {
+ fixedStart += '\\' + char;
+ }
+ escaped = false;
+ } else {
+ escaped = true;
+ }
+ break;
+ case '{':
+ if (escaped) {
+ // if we escaped this opening bracket, then it is to be taken literally. Remove the `\` because we've acknowleged it and add the `{` to the appropriate string
+ if (inBraces) {
+ strInBraces += char;
+ } else {
+ fixedStart += char;
+ }
+ escaped = false;
+ } else {
+ if (inBraces) {
+ // ripgrep treats this as attempting to do a nested alternate group, which is invalid. Return with pattern including changes from escaped braces.
+ return { strInBraces: fixedStart + '{' + strInBraces + '{' + pattern.substring(i + 1) };
+ } else {
+ inBraces = true;
+ }
+ }
+ break;
+ case '}':
+ if (escaped) {
+ // same as `}`, but for closing bracket
+ if (inBraces) {
+ strInBraces += char;
+ } else {
+ fixedStart += char;
+ }
+ escaped = false;
+ } else if (inBraces) {
+ // we found an end bracket to a valid opening bracket. Return the appropriate strings.
+ return { fixedStart, strInBraces, fixedEnd: pattern.substring(i + 1) };
+ } else {
+ // if we're not in braces and not escaped, then this is a literal `}` character and we're still adding to fixedStart.
+ fixedStart += char;
+ }
+ break;
+ default:
+ // similar to the `\\` case, we didn't do anything with the escape, so we should re-insert it into the appropriate string
+ // to be consumed later when individual parts of the glob are processed
+ if (inBraces) {
+ strInBraces += (escaped ? '\\' : '') + char;
+ } else {
+ fixedStart += (escaped ? '\\' : '') + char;
+ }
+ escaped = false;
+ break;
+ }
+ }
+
+
+ // we are haven't hit the last brace, so no splitting should occur. Return with pattern including changes from escaped braces.
+ return { strInBraces: fixedStart + (inBraces ? ('{' + strInBraces) : '') };
+}
+
+/**
+ * Parses out curly braces and returns equivalent globs. Only supports one level of nesting.
+ * Exported for testing.
+ */
+export function performBraceExpansionForRipgrep(pattern: string): string[] {
+ const { fixedStart, strInBraces, fixedEnd } = getEscapeAwareSplitStringForRipgrep(pattern);
+ if (fixedStart === undefined || fixedEnd === undefined) {
+ return [strInBraces];
+ }
+
+ let arr = splitGlobAware(strInBraces, ',');
+
+ if (!arr.length) {
+ // occurs if the braces are empty.
+ arr = [''];
+ }
+
+ const ends = performBraceExpansionForRipgrep(fixedEnd);
+
+ return arr.flatMap((elem) => {
+ const start = fixedStart + elem;
+ return ends.map((end) => {
+ return start + end;
+ });
+ });
+}
diff --git a/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngineUtils.test.ts b/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngineUtils.test.ts
@@ -6,8 +6,8 @@
 import * as assert from 'assert';
 import { joinPath } from 'vs/base/common/resources';
 import { URI } from 'vs/base/common/uri';
-import { fixRegexNewline, IRgMatch, IRgMessage, RipgrepParser, unicodeEscapesToPCRE2, fixNewline } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine';
-import { Range, TextSearchResult } from 'vs/workbench/services/search/common/searchExtTypes';
+import { fixRegexNewline, IRgMatch, IRgMessage, RipgrepParser, unicodeEscapesToPCRE2, fixNewline, getRgArgs, performBraceExpansionForRipgrep } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine';
+import { Range, TextSearchOptions, TextSearchQuery, TextSearchResult } from 'vs/workbench/services/search/common/searchExtTypes';
 
 suite('RipgrepTextSearchEngine', () => {
  test('unicodeEscapesToPCRE2', async () => {
@@ -296,4 +296,76 @@ suite('RipgrepTextSearchEngine', () => {
  ]);
  });
  });
+
+ suite('getRgArgs', () => {
+ test('simple includes', () => {
+ // Only testing the args that come from includes.
+ function testGetRgArgs(includes: string[], expectedFromIncludes: string[]): void {
+ const query: TextSearchQuery = {
+ pattern: 'test'
+ };
+
+ const options: TextSearchOptions = {
+ includes: includes,
+ excludes: [],
+ maxResults: 1000,
+ useIgnoreFiles: false,
+ followSymlinks: false,
+ useGlobalIgnoreFiles: false,
+ useParentIgnoreFiles: false,
+ folder: URI.file('/some/folder')
+ };
+ const expected = [
+ '--hidden',
+ '--ignore-case',
+ ...expectedFromIncludes,
+ '--no-ignore',
+ '--crlf',
+ '--fixed-strings',
+ '--no-config',
+ '--no-ignore-global',
+ '--json',
+ '--',
+ 'test',
+ '.'];
+ const result = getRgArgs(query, options);
+ assert.deepStrictEqual(result, expected);
+ }
+
+ ([
+ [['a/*', 'b/*'], ['-g', '!*', '-g', '/a', '-g', '/a/*', '-g', '/b', '-g', '/b/*']],
+ [['**/a/*', 'b/*'], ['-g', '!*', '-g', '/b', '-g', '/b/*', '-g', '**/a/*']],
+ [['**/a/*', '**/b/*'], ['-g', '**/a/*', '-g', '**/b/*']],
+ [['foo/*bar/something/**'], ['-g', '!*', '-g', '/foo', '-g', '/foo/*bar', '-g', '/foo/*bar/something', '-g', '/foo/*bar/something/**']],
+ ].forEach(([includes, expectedFromIncludes]) => testGetRgArgs(<string[]>includes, <string[]>expectedFromIncludes)));
+ });
+ });
+
+ test('brace expansion for ripgrep', () => {
+ function testBraceExpansion(argGlob: string, expectedGlob: string[]): void {
+ const result = performBraceExpansionForRipgrep(argGlob);
+ assert.deepStrictEqual(result, expectedGlob);
+ }
+
+ [
+ ['eep/{a,b}/test', ['eep/a/test', 'eep/b/test']],
+ ['eep/{a,b}/{c,d,e}', ['eep/a/c', 'eep/a/d', 'eep/a/e', 'eep/b/c', 'eep/b/d', 'eep/b/e']],
+ ['eep/{a,b}/\\{c,d,e}', ['eep/a/{c,d,e}', 'eep/b/{c,d,e}']],
+ ['eep/{a,b\\}/test', ['eep/{a,b}/test']],
+ ['eep/{a,b\\\\}/test', ['eep/a/test', 'eep/b\\\\/test']],
+ ['eep/{a,b\\\\\\}/test', ['eep/{a,b\\\\}/test']],
+ ['e\\{ep/{a,b}/test', ['e{ep/a/test', 'e{ep/b/test']],
+ ['eep/{a,\\b}/test', ['eep/a/test', 'eep/\\b/test']],
+ ['{a/*.*,b/*.*}', ['a/*.*', 'b/*.*']],
+ ['{{}', ['{{}']],
+ ['aa{{}', ['aa{{}']],
+ ['{b{}', ['{b{}']],
+ ['{{}c', ['{{}c']],
+ ['{{}}', ['{{}}']],
+ ['\\{{}}', ['{}']],
+ ['{}foo', ['foo']],
+ ['bar{ }foo', ['bar foo']],
+ ['{}', ['']],
+ ].forEach(([includePattern, expectedPatterns]) => testBraceExpansion(<string>includePattern, <string[]>expectedPatterns));
+ });
 });