diff --git a/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts b/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts index c70e5b554ff1d2..e51fbdcaa41849 100644 --- a/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts +++ b/src/vs/workbench/services/search/node/ripgrepTextSearchEngine.ts @@ -368,7 +368,8 @@ function getNumLinesAndLastNewlineLength(text: string): { numLines: number; last return { numLines, lastLineLength }; } -function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] { +// exported for testing +export function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] { const args = ['--hidden']; args.push(query.isCaseSensitive ? '--case-sensitive' : '--ignore-case'); @@ -495,9 +496,14 @@ function getRgArgs(query: TextSearchQuery, options: TextSearchOptions): string[] /** * `"foo/*bar/something"` -> `["foo", "foo/*bar", "foo/*bar/something", "foo/*bar/something/**"]` */ -function spreadGlobComponents(globArg: string): string[] { - const components = splitGlobAware(globArg, '/'); - return components.map((_, i) => components.slice(0, i + 1).join('/')); +function spreadGlobComponents(globComponent: string): string[] { + const globComponentWithBraceExpansion = performBraceExpansionForRipgrep(globComponent); + + return globComponentWithBraceExpansion.flatMap((globArg) => { + const components = splitGlobAware(globArg, '/'); + return components.map((_, i) => components.slice(0, i + 1).join('/')); + }); + } export function unicodeEscapesToPCRE2(pattern: string): string { @@ -629,3 +635,113 @@ export function fixRegexNewline(pattern: string): string { export function fixNewline(pattern: string): string { return pattern.replace(/\n/g, '\\r?\\n'); } + +// brace expansion for ripgrep + +/** + * Split string given first opportunity for brace expansion in the string. + * - If the brace is prepended by a \ character, then it is escaped. + * - Does not process escapes that are within the sub-glob. + * - If two unescaped `{` occur before `}`, then ripgrep will return an error for brace nesting, so don't split on those. + */ +function getEscapeAwareSplitStringForRipgrep(pattern: string): { fixedStart?: string; strInBraces: string; fixedEnd?: string } { + let inBraces = false; + let escaped = false; + let fixedStart = ''; + let strInBraces = ''; + for (let i = 0; i < pattern.length; i++) { + const char = pattern[i]; + switch (char) { + case '\\': + if (escaped) { + // If we're already escaped, then just leave the escaped slash and the preceeding slash that escapes it. + // The two escaped slashes will result in a single slash and whatever processes the glob later will properly process the escape + if (inBraces) { + strInBraces += '\\' + char; + } else { + fixedStart += '\\' + char; + } + escaped = false; + } else { + escaped = true; + } + break; + case '{': + if (escaped) { + // if we escaped this opening bracket, then it is to be taken literally. Remove the `\` because we've acknowleged it and add the `{` to the appropriate string + if (inBraces) { + strInBraces += char; + } else { + fixedStart += char; + } + escaped = false; + } else { + if (inBraces) { + // ripgrep treats this as attempting to do a nested alternate group, which is invalid. Return with pattern including changes from escaped braces. + return { strInBraces: fixedStart + '{' + strInBraces + '{' + pattern.substring(i + 1) }; + } else { + inBraces = true; + } + } + break; + case '}': + if (escaped) { + // same as `}`, but for closing bracket + if (inBraces) { + strInBraces += char; + } else { + fixedStart += char; + } + escaped = false; + } else if (inBraces) { + // we found an end bracket to a valid opening bracket. Return the appropriate strings. + return { fixedStart, strInBraces, fixedEnd: pattern.substring(i + 1) }; + } else { + // if we're not in braces and not escaped, then this is a literal `}` character and we're still adding to fixedStart. + fixedStart += char; + } + break; + default: + // similar to the `\\` case, we didn't do anything with the escape, so we should re-insert it into the appropriate string + // to be consumed later when individual parts of the glob are processed + if (inBraces) { + strInBraces += (escaped ? '\\' : '') + char; + } else { + fixedStart += (escaped ? '\\' : '') + char; + } + escaped = false; + break; + } + } + + + // we are haven't hit the last brace, so no splitting should occur. Return with pattern including changes from escaped braces. + return { strInBraces: fixedStart + (inBraces ? ('{' + strInBraces) : '') }; +} + +/** + * Parses out curly braces and returns equivalent globs. Only supports one level of nesting. + * Exported for testing. + */ +export function performBraceExpansionForRipgrep(pattern: string): string[] { + const { fixedStart, strInBraces, fixedEnd } = getEscapeAwareSplitStringForRipgrep(pattern); + if (fixedStart === undefined || fixedEnd === undefined) { + return [strInBraces]; + } + + let arr = splitGlobAware(strInBraces, ','); + + if (!arr.length) { + // occurs if the braces are empty. + arr = ['']; + } + + const ends = performBraceExpansionForRipgrep(fixedEnd); + + return arr.flatMap((elem) => { + const start = fixedStart + elem; + return ends.map((end) => { + return start + end; + }); + }); +} diff --git a/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngineUtils.test.ts b/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngineUtils.test.ts index 5090c39fc39fa9..5a06f03cee41d6 100644 --- a/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngineUtils.test.ts +++ b/src/vs/workbench/services/search/test/node/ripgrepTextSearchEngineUtils.test.ts @@ -6,8 +6,8 @@ import * as assert from 'assert'; import { joinPath } from 'vs/base/common/resources'; import { URI } from 'vs/base/common/uri'; -import { fixRegexNewline, IRgMatch, IRgMessage, RipgrepParser, unicodeEscapesToPCRE2, fixNewline } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine'; -import { Range, TextSearchResult } from 'vs/workbench/services/search/common/searchExtTypes'; +import { fixRegexNewline, IRgMatch, IRgMessage, RipgrepParser, unicodeEscapesToPCRE2, fixNewline, getRgArgs, performBraceExpansionForRipgrep } from 'vs/workbench/services/search/node/ripgrepTextSearchEngine'; +import { Range, TextSearchOptions, TextSearchQuery, TextSearchResult } from 'vs/workbench/services/search/common/searchExtTypes'; suite('RipgrepTextSearchEngine', () => { test('unicodeEscapesToPCRE2', async () => { @@ -296,4 +296,76 @@ suite('RipgrepTextSearchEngine', () => { ]); }); }); + + suite('getRgArgs', () => { + test('simple includes', () => { + // Only testing the args that come from includes. + function testGetRgArgs(includes: string[], expectedFromIncludes: string[]): void { + const query: TextSearchQuery = { + pattern: 'test' + }; + + const options: TextSearchOptions = { + includes: includes, + excludes: [], + maxResults: 1000, + useIgnoreFiles: false, + followSymlinks: false, + useGlobalIgnoreFiles: false, + useParentIgnoreFiles: false, + folder: URI.file('/some/folder') + }; + const expected = [ + '--hidden', + '--ignore-case', + ...expectedFromIncludes, + '--no-ignore', + '--crlf', + '--fixed-strings', + '--no-config', + '--no-ignore-global', + '--json', + '--', + 'test', + '.']; + const result = getRgArgs(query, options); + assert.deepStrictEqual(result, expected); + } + + ([ + [['a/*', 'b/*'], ['-g', '!*', '-g', '/a', '-g', '/a/*', '-g', '/b', '-g', '/b/*']], + [['**/a/*', 'b/*'], ['-g', '!*', '-g', '/b', '-g', '/b/*', '-g', '**/a/*']], + [['**/a/*', '**/b/*'], ['-g', '**/a/*', '-g', '**/b/*']], + [['foo/*bar/something/**'], ['-g', '!*', '-g', '/foo', '-g', '/foo/*bar', '-g', '/foo/*bar/something', '-g', '/foo/*bar/something/**']], + ].forEach(([includes, expectedFromIncludes]) => testGetRgArgs(includes, expectedFromIncludes))); + }); + }); + + test('brace expansion for ripgrep', () => { + function testBraceExpansion(argGlob: string, expectedGlob: string[]): void { + const result = performBraceExpansionForRipgrep(argGlob); + assert.deepStrictEqual(result, expectedGlob); + } + + [ + ['eep/{a,b}/test', ['eep/a/test', 'eep/b/test']], + ['eep/{a,b}/{c,d,e}', ['eep/a/c', 'eep/a/d', 'eep/a/e', 'eep/b/c', 'eep/b/d', 'eep/b/e']], + ['eep/{a,b}/\\{c,d,e}', ['eep/a/{c,d,e}', 'eep/b/{c,d,e}']], + ['eep/{a,b\\}/test', ['eep/{a,b}/test']], + ['eep/{a,b\\\\}/test', ['eep/a/test', 'eep/b\\\\/test']], + ['eep/{a,b\\\\\\}/test', ['eep/{a,b\\\\}/test']], + ['e\\{ep/{a,b}/test', ['e{ep/a/test', 'e{ep/b/test']], + ['eep/{a,\\b}/test', ['eep/a/test', 'eep/\\b/test']], + ['{a/*.*,b/*.*}', ['a/*.*', 'b/*.*']], + ['{{}', ['{{}']], + ['aa{{}', ['aa{{}']], + ['{b{}', ['{b{}']], + ['{{}c', ['{{}c']], + ['{{}}', ['{{}}']], + ['\\{{}}', ['{}']], + ['{}foo', ['foo']], + ['bar{ }foo', ['bar foo']], + ['{}', ['']], + ].forEach(([includePattern, expectedPatterns]) => testBraceExpansion(includePattern, expectedPatterns)); + }); });