Skip to content

Commit

Permalink
FindTextInFiles doesn't support comma-separated globs with {} (micr…
Browse files Browse the repository at this point in the history
…osoft#169885)

* `FindTextInFiles` doesn't support comma-separated globs with {}
Fixes microsoft#169422
  • Loading branch information
andreamah authored and c-claeys committed Feb 16, 2023
1 parent df99ecc commit e4fbe1d
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 6 deletions.
124 changes: 120 additions & 4 deletions src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,8 @@ function getNumLinesAndLastNewlineLength(text: string): { numLines: number; last
return { numLines, lastLineLength };
}

function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] {
// exported for testing
export function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] {
const args = ['--hidden'];
args.push(query.isCaseSensitive ? '--case-sensitive' : '--ignore-case');

Expand Down Expand Up @@ -495,9 +496,14 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[]
/**
* `"foo/*bar/something"` -> `["foo", "foo/*bar", "foo/*bar/something", "foo/*bar/something/**"]`
*/
function spreadGlobComponents(globArg: string): string[] {
const components = splitGlobAware(globArg, '/');
return components.map((_, i) => components.slice(0, i + 1).join('/'));
function spreadGlobComponents(globComponent: string): string[] {
const globComponentWithBraceExpansion = performBraceExpansionForRipgrep(globComponent);

return globComponentWithBraceExpansion.flatMap((globArg) => {
const components = splitGlobAware(globArg, '/');
return components.map((_, i) => components.slice(0, i + 1).join('/'));
});

}

export function unicodeEscapesToPCRE2(pattern: string): string {
Expand Down Expand Up @@ -629,3 +635,113 @@ export function fixRegexNewline(pattern: string): string {
export function fixNewline(pattern: string): string {
return pattern.replace(/\n/g, '\\r?\\n');
}

// brace expansion for ripgrep

/**
* Split string given first opportunity for brace expansion in the string.
* - If the brace is prepended by a \ character, then it is escaped.
* - Does not process escapes that are within the sub-glob.
* - If two unescaped `{` occur before `}`, then ripgrep will return an error for brace nesting, so don't split on those.
*/
function getEscapeAwareSplitStringForRipgrep(pattern: string): { fixedStart?: string; strInBraces: string; fixedEnd?: string } {
let inBraces = false;
let escaped = false;
let fixedStart = '';
let strInBraces = '';
for (let i = 0; i < pattern.length; i++) {
const char = pattern[i];
switch (char) {
case '\\':
if (escaped) {
// If we're already escaped, then just leave the escaped slash and the preceeding slash that escapes it.
// The two escaped slashes will result in a single slash and whatever processes the glob later will properly process the escape
if (inBraces) {
strInBraces += '\\' + char;
} else {
fixedStart += '\\' + char;
}
escaped = false;
} else {
escaped = true;
}
break;
case '{':
if (escaped) {
// if we escaped this opening bracket, then it is to be taken literally. Remove the `\` because we've acknowleged it and add the `{` to the appropriate string
if (inBraces) {
strInBraces += char;
} else {
fixedStart += char;
}
escaped = false;
} else {
if (inBraces) {
// ripgrep treats this as attempting to do a nested alternate group, which is invalid. Return with pattern including changes from escaped braces.
return { strInBraces: fixedStart + '{' + strInBraces + '{' + pattern.substring(i + 1) };
} else {
inBraces = true;
}
}
break;
case '}':
if (escaped) {
// same as `}`, but for closing bracket
if (inBraces) {
strInBraces += char;
} else {
fixedStart += char;
}
escaped = false;
} else if (inBraces) {
// we found an end bracket to a valid opening bracket. Return the appropriate strings.
return { fixedStart, strInBraces, fixedEnd: pattern.substring(i + 1) };
} else {
// if we're not in braces and not escaped, then this is a literal `}` character and we're still adding to fixedStart.
fixedStart += char;
}
break;
default:
// similar to the `\\` case, we didn't do anything with the escape, so we should re-insert it into the appropriate string
// to be consumed later when individual parts of the glob are processed
if (inBraces) {
strInBraces += (escaped ? '\\' : '') + char;
} else {
fixedStart += (escaped ? '\\' : '') + char;
}
escaped = false;
break;
}
}


// we are haven't hit the last brace, so no splitting should occur. Return with pattern including changes from escaped braces.
return { strInBraces: fixedStart + (inBraces ? ('{' + strInBraces) : '') };
}

/**
* Parses out curly braces and returns equivalent globs. Only supports one level of nesting.
* Exported for testing.
*/
export function performBraceExpansionForRipgrep(pattern: string): string[] {
const { fixedStart, strInBraces, fixedEnd } = getEscapeAwareSplitStringForRipgrep(pattern);
if (fixedStart === undefined || fixedEnd === undefined) {
return [strInBraces];
}

let arr = splitGlobAware(strInBraces, ',');

if (!arr.length) {
// occurs if the braces are empty.
arr = [''];
}

const ends = performBraceExpansionForRipgrep(fixedEnd);

return arr.flatMap((elem) => {
const start = fixedStart + elem;
return ends.map((end) => {
return start + end;
});
});
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import * as assert from 'assert';
import { joinPath } from 'vs/base/common/resources';
import { URI } from 'vs/base/common/uri';
import { fixRegexNewline, IRgMatch, IRgMessage, RipgrepParser, unicodeEscapesToPCRE2, fixNewline } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine';
import { Range, TextSearchResult } from 'vs/workbench/services/search/common/searchExtTypes';
import { fixRegexNewline, IRgMatch, IRgMessage, RipgrepParser, unicodeEscapesToPCRE2, fixNewline, getRgArgs, performBraceExpansionForRipgrep } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine';
import { Range, TextSearchOptions, TextSearchQuery, TextSearchResult } from 'vs/workbench/services/search/common/searchExtTypes';

suite('RipgrepTextSearchEngine', () => {
test('unicodeEscapesToPCRE2', async () => {
Expand Down Expand Up @@ -296,4 +296,76 @@ suite('RipgrepTextSearchEngine', () => {
]);
});
});

suite('getRgArgs', () => {
test('simple includes', () => {
// Only testing the args that come from includes.
function testGetRgArgs(includes: string[], expectedFromIncludes: string[]): void {
const query: TextSearchQuery = {
pattern: 'test'
};

const options: TextSearchOptions = {
includes: includes,
excludes: [],
maxResults: 1000,
useIgnoreFiles: false,
followSymlinks: false,
useGlobalIgnoreFiles: false,
useParentIgnoreFiles: false,
folder: URI.file('/some/folder')
};
const expected = [
'--hidden',
'--ignore-case',
...expectedFromIncludes,
'--no-ignore',
'--crlf',
'--fixed-strings',
'--no-config',
'--no-ignore-global',
'--json',
'--',
'test',
'.'];
const result = getRgArgs(query, options);
assert.deepStrictEqual(result, expected);
}

([
[['a/*', 'b/*'], ['-g', '!*', '-g', '/a', '-g', '/a/*', '-g', '/b', '-g', '/b/*']],
[['**/a/*', 'b/*'], ['-g', '!*', '-g', '/b', '-g', '/b/*', '-g', '**/a/*']],
[['**/a/*', '**/b/*'], ['-g', '**/a/*', '-g', '**/b/*']],
[['foo/*bar/something/**'], ['-g', '!*', '-g', '/foo', '-g', '/foo/*bar', '-g', '/foo/*bar/something', '-g', '/foo/*bar/something/**']],
].forEach(([includes, expectedFromIncludes]) => testGetRgArgs(<string[]>includes, <string[]>expectedFromIncludes)));
});
});

test('brace expansion for ripgrep', () => {
function testBraceExpansion(argGlob: string, expectedGlob: string[]): void {
const result = performBraceExpansionForRipgrep(argGlob);
assert.deepStrictEqual(result, expectedGlob);
}

[
['eep/{a,b}/test', ['eep/a/test', 'eep/b/test']],
['eep/{a,b}/{c,d,e}', ['eep/a/c', 'eep/a/d', 'eep/a/e', 'eep/b/c', 'eep/b/d', 'eep/b/e']],
['eep/{a,b}/\\{c,d,e}', ['eep/a/{c,d,e}', 'eep/b/{c,d,e}']],
['eep/{a,b\\}/test', ['eep/{a,b}/test']],
['eep/{a,b\\\\}/test', ['eep/a/test', 'eep/b\\\\/test']],
['eep/{a,b\\\\\\}/test', ['eep/{a,b\\\\}/test']],
['e\\{ep/{a,b}/test', ['e{ep/a/test', 'e{ep/b/test']],
['eep/{a,\\b}/test', ['eep/a/test', 'eep/\\b/test']],
['{a/*.*,b/*.*}', ['a/*.*', 'b/*.*']],
['{{}', ['{{}']],
['aa{{}', ['aa{{}']],
['{b{}', ['{b{}']],
['{{}c', ['{{}c']],
['{{}}', ['{{}}']],
['\\{{}}', ['{}']],
['{}foo', ['foo']],
['bar{ }foo', ['bar foo']],
['{}', ['']],
].forEach(([includePattern, expectedPatterns]) => testBraceExpansion(<string>includePattern, <string[]>expectedPatterns));
});
});

0 comments on commit e4fbe1d

Please sign in to comment.