Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FindTextInFiles doesn't support comma-separated globs with {} #169885

Merged
merged 10 commits into from
Feb 7, 2023
124 changes: 120 additions & 4 deletions src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,8 @@ function getNumLinesAndLastNewlineLength(text: string): { numLines: number; last
return { numLines, lastLineLength };
}

function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] {
// exported for testing
export function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] {
const args = ['--hidden'];
args.push(query.isCaseSensitive ? '--case-sensitive' : '--ignore-case');

Expand Down Expand Up @@ -495,9 +496,14 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[]
/**
* `"foo/*bar/something"` -> `["foo", "foo/*bar", "foo/*bar/something", "foo/*bar/something/**"]`
*/
function spreadGlobComponents(globArg: string): string[] {
const components = splitGlobAware(globArg, '/');
return components.map((_, i) => components.slice(0, i + 1).join('/'));
function spreadGlobComponents(globComponent: string): string[] {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the glob processing gets more complicated, it would probably be helpful to break out the code in getRgArgs with the !* etc that handles glob patterns and write a unit test for it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for now, just added unit tests for that part of getRgArgs

const globComponentWithBraceExpansion = performBraceExpansionForRipgrep(globComponent);

return globComponentWithBraceExpansion.flatMap((globArg) => {
const components = splitGlobAware(globArg, '/');
return components.map((_, i) => components.slice(0, i + 1).join('/'));
});

}

export function unicodeEscapesToPCRE2(pattern: string): string {
Expand Down Expand Up @@ -629,3 +635,113 @@ export function fixRegexNewline(pattern: string): string {
export function fixNewline(pattern: string): string {
return pattern.replace(/\n/g, '\\r?\\n');
}

// brace expansion for ripgrep

/**
* Split string given first opportunity for brace expansion in the string.
* - If the brace is prepended by a \ character, then it is escaped.
* - Does not process escapes that are within the sub-glob.
* - If two unescaped `{` occur before `}`, then ripgrep will return an error for brace nesting, so don't split on those.
*/
function getEscapeAwareSplitStringForRipgrep(pattern: string): { fixedStart?: string; strInBraces: string; fixedEnd?: string } {
let inBraces = false;
let escaped = false;
let fixedStart = '';
let strInBraces = '';
for (let i = 0; i < pattern.length; i++) {
const char = pattern[i];
switch (char) {
case '\\':
if (escaped) {
// If we're already escaped, then just leave the escaped slash and the preceeding slash that escapes it.
// The two escaped slashes will result in a single slash and whatever processes the glob later will properly process the escape
if (inBraces) {
strInBraces += '\\' + char;
} else {
fixedStart += '\\' + char;
}
escaped = false;
} else {
escaped = true;
}
break;
case '{':
if (escaped) {
// if we escaped this opening bracket, then it is to be taken literally. Remove the `\` because we've acknowleged it and add the `{` to the appropriate string
if (inBraces) {
strInBraces += char;
} else {
fixedStart += char;
}
escaped = false;
} else {
if (inBraces) {
// ripgrep treats this as attempting to do a nested alternate group, which is invalid. Return with pattern including changes from escaped braces.
return { strInBraces: fixedStart + '{' + strInBraces + '{' + pattern.substring(i + 1) };
} else {
inBraces = true;
}
}
break;
case '}':
if (escaped) {
// same as `}`, but for closing bracket
if (inBraces) {
strInBraces += char;
} else {
fixedStart += char;
}
escaped = false;
} else if (inBraces) {
// we found an end bracket to a valid opening bracket. Return the appropriate strings.
return { fixedStart, strInBraces, fixedEnd: pattern.substring(i + 1) };
} else {
// if we're not in braces and not escaped, then this is a literal `}` character and we're still adding to fixedStart.
fixedStart += char;
}
break;
default:
// similar to the `\\` case, we didn't do anything with the escape, so we should re-insert it into the appropriate string
// to be consumed later when individual parts of the glob are processed
if (inBraces) {
strInBraces += (escaped ? '\\' : '') + char;
} else {
fixedStart += (escaped ? '\\' : '') + char;
}
escaped = false;
break;
}
}


// we are haven't hit the last brace, so no splitting should occur. Return with pattern including changes from escaped braces.
return { strInBraces: fixedStart + (inBraces ? ('{' + strInBraces) : '') };
}

/**
* Parses out curly braces and returns equivalent globs. Only supports one level of nesting.
* Exported for testing.
*/
export function performBraceExpansionForRipgrep(pattern: string): string[] {
const { fixedStart, strInBraces, fixedEnd } = getEscapeAwareSplitStringForRipgrep(pattern);
if (fixedStart === undefined || fixedEnd === undefined) {
return [strInBraces];
}

let arr = splitGlobAware(strInBraces, ',');

if (!arr.length) {
// occurs if the braces are empty.
arr = [''];
}

const ends = performBraceExpansionForRipgrep(fixedEnd);

return arr.flatMap((elem) => {
const start = fixedStart + elem;
return ends.map((end) => {
return start + end;
});
});
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import * as assert from 'assert';
import { joinPath } from 'vs/base/common/resources';
import { URI } from 'vs/base/common/uri';
import { fixRegexNewline, IRgMatch, IRgMessage, RipgrepParser, unicodeEscapesToPCRE2, fixNewline } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine';
import { Range, TextSearchResult } from 'vs/workbench/services/search/common/searchExtTypes';
import { fixRegexNewline, IRgMatch, IRgMessage, RipgrepParser, unicodeEscapesToPCRE2, fixNewline, getRgArgs, performBraceExpansionForRipgrep } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine';
import { Range, TextSearchOptions, TextSearchQuery, TextSearchResult } from 'vs/workbench/services/search/common/searchExtTypes';

suite('RipgrepTextSearchEngine', () => {
test('unicodeEscapesToPCRE2', async () => {
Expand Down Expand Up @@ -296,4 +296,76 @@ suite('RipgrepTextSearchEngine', () => {
]);
});
});

suite('getRgArgs', () => {
test('simple includes', () => {
// Only testing the args that come from includes.
function testGetRgArgs(includes: string[], expectedFromIncludes: string[]): void {
const query: TextSearchQuery = {
pattern: 'test'
};

const options: TextSearchOptions = {
includes: includes,
excludes: [],
maxResults: 1000,
useIgnoreFiles: false,
followSymlinks: false,
useGlobalIgnoreFiles: false,
useParentIgnoreFiles: false,
folder: URI.file('/some/folder')
};
const expected = [
'--hidden',
'--ignore-case',
...expectedFromIncludes,
'--no-ignore',
'--crlf',
'--fixed-strings',
'--no-config',
'--no-ignore-global',
'--json',
'--',
'test',
'.'];
const result = getRgArgs(query, options);
assert.deepStrictEqual(result, expected);
}

([
[['a/*', 'b/*'], ['-g', '!*', '-g', '/a', '-g', '/a/*', '-g', '/b', '-g', '/b/*']],
[['**/a/*', 'b/*'], ['-g', '!*', '-g', '/b', '-g', '/b/*', '-g', '**/a/*']],
[['**/a/*', '**/b/*'], ['-g', '**/a/*', '-g', '**/b/*']],
[['foo/*bar/something/**'], ['-g', '!*', '-g', '/foo', '-g', '/foo/*bar', '-g', '/foo/*bar/something', '-g', '/foo/*bar/something/**']],
].forEach(([includes, expectedFromIncludes]) => testGetRgArgs(<string[]>includes, <string[]>expectedFromIncludes)));
});
});

test('brace expansion for ripgrep', () => {
function testBraceExpansion(argGlob: string, expectedGlob: string[]): void {
const result = performBraceExpansionForRipgrep(argGlob);
assert.deepStrictEqual(result, expectedGlob);
}

[
['eep/{a,b}/test', ['eep/a/test', 'eep/b/test']],
['eep/{a,b}/{c,d,e}', ['eep/a/c', 'eep/a/d', 'eep/a/e', 'eep/b/c', 'eep/b/d', 'eep/b/e']],
['eep/{a,b}/\\{c,d,e}', ['eep/a/{c,d,e}', 'eep/b/{c,d,e}']],
['eep/{a,b\\}/test', ['eep/{a,b}/test']],
['eep/{a,b\\\\}/test', ['eep/a/test', 'eep/b\\\\/test']],
['eep/{a,b\\\\\\}/test', ['eep/{a,b\\\\}/test']],
['e\\{ep/{a,b}/test', ['e{ep/a/test', 'e{ep/b/test']],
['eep/{a,\\b}/test', ['eep/a/test', 'eep/\\b/test']],
['{a/*.*,b/*.*}', ['a/*.*', 'b/*.*']],
['{{}', ['{{}']],
['aa{{}', ['aa{{}']],
['{b{}', ['{b{}']],
['{{}c', ['{{}c']],
['{{}}', ['{{}}']],
['\\{{}}', ['{}']],
['{}foo', ['foo']],
['bar{ }foo', ['bar foo']],
['{}', ['']],
].forEach(([includePattern, expectedPatterns]) => testBraceExpansion(<string>includePattern, <string[]>expectedPatterns));
});
});