Skip to content

Commit 031cdee

Browse files
committed
feat(init): add grep and glob local-op handlers
Add two new local-op types that let the server search project files without reading them all: - grep: regex search across files with optional glob filter, batched (multiple patterns in one round-trip), capped at 100 matches per search with 2000-char line truncation - glob: find files by pattern, batched (multiple patterns in one round-trip), capped at 100 results Uses ripgrep (rg) when available for speed and native binary file detection. Falls back to a Node.js fs-based implementation that skips node_modules, .git, and other non-source directories, with a per-file size cap matching MAX_FILE_BYTES (256KB). Counterpart server-side schemas will be added in cli-init-api. Made-with: Cursor
1 parent 74898e8 commit 031cdee

File tree

4 files changed

+610
-0
lines changed

4 files changed

+610
-0
lines changed

src/lib/init/local-ops.ts

Lines changed: 363 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ import type {
3232
DetectSentryPayload,
3333
DirEntry,
3434
FileExistsBatchPayload,
35+
GlobPayload,
36+
GrepPayload,
3537
ListDirPayload,
3638
LocalOpPayload,
3739
LocalOpResult,
@@ -313,6 +315,10 @@ export async function handleLocalOp(
313315
return await runCommands(payload, options.dryRun);
314316
case "apply-patchset":
315317
return await applyPatchset(payload, options.dryRun);
318+
case "grep":
319+
return await grep(payload);
320+
case "glob":
321+
return await glob(payload);
316322
case "create-sentry-project":
317323
return await createSentryProject(payload, options);
318324
case "detect-sentry":
@@ -846,6 +852,363 @@ async function detectSentry(
846852
};
847853
}
848854

855+
// ── Grep & Glob ─────────────────────────────────────────────────────
856+
857+
const MAX_GREP_RESULTS_PER_SEARCH = 100;
858+
const MAX_GREP_LINE_LENGTH = 2000;
859+
const MAX_GLOB_RESULTS = 100;
860+
const SKIP_DIRS = new Set([
861+
"node_modules",
862+
".git",
863+
"__pycache__",
864+
".venv",
865+
"venv",
866+
"dist",
867+
"build",
868+
]);
869+
870+
type GrepMatch = { path: string; lineNum: number; line: string };
871+
872+
// ── Ripgrep implementations (preferred when rg is on PATH) ──────────
873+
874+
/**
875+
* Spawn a command, collect stdout + stderr, reject on spawn errors (ENOENT).
876+
* Drains both streams to prevent pipe buffer deadlocks.
877+
*/
878+
function spawnCollect(
879+
cmd: string,
880+
args: string[],
881+
cwd: string
882+
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
883+
return new Promise((resolve, reject) => {
884+
const child = spawn(cmd, args, {
885+
cwd,
886+
stdio: ["ignore", "pipe", "pipe"],
887+
timeout: 30_000,
888+
});
889+
890+
const outChunks: Buffer[] = [];
891+
let outLen = 0;
892+
child.stdout.on("data", (chunk: Buffer) => {
893+
if (outLen < MAX_OUTPUT_BYTES) {
894+
outChunks.push(chunk);
895+
outLen += chunk.length;
896+
}
897+
});
898+
899+
const errChunks: Buffer[] = [];
900+
child.stderr.on("data", (chunk: Buffer) => {
901+
if (errChunks.length < 64) {
902+
errChunks.push(chunk);
903+
}
904+
});
905+
906+
child.on("error", (err) => {
907+
reject(err);
908+
});
909+
child.on("close", (code) => {
910+
resolve({
911+
stdout: Buffer.concat(outChunks).toString("utf-8"),
912+
stderr: Buffer.concat(errChunks).toString("utf-8"),
913+
exitCode: code ?? 1,
914+
});
915+
});
916+
});
917+
}
918+
919+
/**
920+
* Parse ripgrep output using `|` as field separator (set via
921+
* `--field-match-separator=|`) to avoid ambiguity with `:` in
922+
* Windows drive-letter paths.
923+
* Format: filepath|linenum|matched text
924+
*/
925+
function parseRgGrepOutput(
926+
cwd: string,
927+
stdout: string,
928+
maxResults: number
929+
): { matches: GrepMatch[]; truncated: boolean } {
930+
const lines = stdout.split("\n").filter(Boolean);
931+
const truncated = lines.length > maxResults;
932+
const matches: GrepMatch[] = [];
933+
934+
for (const line of lines.slice(0, maxResults)) {
935+
const firstSep = line.indexOf("|");
936+
if (firstSep === -1) {
937+
continue;
938+
}
939+
const filePart = line.substring(0, firstSep);
940+
const rest = line.substring(firstSep + 1);
941+
const secondSep = rest.indexOf("|");
942+
if (secondSep === -1) {
943+
continue;
944+
}
945+
const lineNum = Number.parseInt(rest.substring(0, secondSep), 10);
946+
let text = rest.substring(secondSep + 1);
947+
if (text.length > MAX_GREP_LINE_LENGTH) {
948+
text = `${text.substring(0, MAX_GREP_LINE_LENGTH)}…`;
949+
}
950+
matches.push({ path: path.relative(cwd, filePart), lineNum, line: text });
951+
}
952+
953+
return { matches, truncated };
954+
}
955+
956+
async function rgGrepSearch(opts: {
957+
cwd: string;
958+
pattern: string;
959+
target: string;
960+
include: string | undefined;
961+
maxResults: number;
962+
}): Promise<{ matches: GrepMatch[]; truncated: boolean }> {
963+
const { cwd, pattern, target, include, maxResults } = opts;
964+
const args = [
965+
"-nH",
966+
"--no-messages",
967+
"--hidden",
968+
"--field-match-separator=|",
969+
"--regexp",
970+
pattern,
971+
];
972+
if (include) {
973+
args.push("--glob", include);
974+
}
975+
args.push(target);
976+
977+
const { stdout, exitCode } = await spawnCollect("rg", args, cwd);
978+
979+
if (exitCode === 1 || (exitCode === 2 && !stdout.trim())) {
980+
return { matches: [], truncated: false };
981+
}
982+
if (exitCode !== 0 && exitCode !== 2) {
983+
throw new Error(`ripgrep failed with exit code ${exitCode}`);
984+
}
985+
986+
return parseRgGrepOutput(cwd, stdout, maxResults);
987+
}
988+
989+
async function rgGlobSearch(opts: {
990+
cwd: string;
991+
pattern: string;
992+
target: string;
993+
maxResults: number;
994+
}): Promise<{ files: string[]; truncated: boolean }> {
995+
const { cwd, pattern, target, maxResults } = opts;
996+
const args = ["--files", "--hidden", "--glob", pattern, target];
997+
998+
const { stdout, exitCode } = await spawnCollect("rg", args, cwd);
999+
1000+
if (exitCode === 1 || (exitCode === 2 && !stdout.trim())) {
1001+
return { files: [], truncated: false };
1002+
}
1003+
if (exitCode !== 0 && exitCode !== 2) {
1004+
throw new Error(`ripgrep failed with exit code ${exitCode}`);
1005+
}
1006+
1007+
const lines = stdout.split("\n").filter(Boolean);
1008+
const truncated = lines.length > maxResults;
1009+
const files = lines.slice(0, maxResults).map((f) => path.relative(cwd, f));
1010+
return { files, truncated };
1011+
}
1012+
1013+
// ── Node.js fallback (when rg is not installed) ─────────────────────
1014+
1015+
/**
1016+
* Recursively walk a directory, yielding relative file paths.
1017+
* Skips common non-source directories and respects an optional glob filter.
1018+
*/
1019+
async function* walkFiles(
1020+
root: string,
1021+
base: string,
1022+
globPattern: string | undefined
1023+
): AsyncGenerator<string> {
1024+
let entries: fs.Dirent[];
1025+
try {
1026+
entries = await fs.promises.readdir(base, { withFileTypes: true });
1027+
} catch {
1028+
return;
1029+
}
1030+
for (const entry of entries) {
1031+
const full = path.join(base, entry.name);
1032+
const rel = path.relative(root, full);
1033+
if (entry.isDirectory() && !SKIP_DIRS.has(entry.name)) {
1034+
yield* walkFiles(root, full, globPattern);
1035+
} else if (entry.isFile()) {
1036+
const matchTarget = globPattern?.includes("/") ? rel : entry.name;
1037+
if (!globPattern || matchGlob(matchTarget, globPattern)) {
1038+
yield rel;
1039+
}
1040+
}
1041+
}
1042+
}
1043+
1044+
/** Minimal glob matcher — supports `*`, `**`, and `?` wildcards. */
1045+
function matchGlob(name: string, pattern: string): boolean {
1046+
const re = pattern
1047+
.replace(/[.+^${}()|[\]\\]/g, "\\$&")
1048+
.replace(/\*\*/g, "\0")
1049+
.replace(/\*/g, "[^/]*")
1050+
.replace(/\0/g, ".*")
1051+
.replace(/\?/g, ".");
1052+
return new RegExp(`^${re}$`).test(name);
1053+
}
1054+
1055+
/**
1056+
* Search files for a regex pattern using Node.js fs. Fallback for when
1057+
* ripgrep is not available.
1058+
*/
1059+
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: file-walking search with early exits
1060+
async function fsGrepSearch(opts: {
1061+
cwd: string;
1062+
pattern: string;
1063+
searchPath: string | undefined;
1064+
include: string | undefined;
1065+
maxResults: number;
1066+
}): Promise<{ matches: GrepMatch[]; truncated: boolean }> {
1067+
const { cwd, pattern, searchPath, include, maxResults } = opts;
1068+
const target = searchPath ? safePath(cwd, searchPath) : cwd;
1069+
const regex = new RegExp(pattern);
1070+
const matches: GrepMatch[] = [];
1071+
1072+
for await (const rel of walkFiles(cwd, target, include)) {
1073+
if (matches.length > maxResults) {
1074+
break;
1075+
}
1076+
const absPath = path.join(cwd, rel);
1077+
let content: string;
1078+
try {
1079+
const stat = await fs.promises.stat(absPath);
1080+
if (stat.size > MAX_FILE_BYTES) {
1081+
continue;
1082+
}
1083+
content = await fs.promises.readFile(absPath, "utf-8");
1084+
} catch {
1085+
continue;
1086+
}
1087+
const lines = content.split("\n");
1088+
for (let i = 0; i < lines.length; i += 1) {
1089+
const line = lines[i] ?? "";
1090+
if (regex.test(line)) {
1091+
let text = line;
1092+
if (text.length > MAX_GREP_LINE_LENGTH) {
1093+
text = `${text.substring(0, MAX_GREP_LINE_LENGTH)}…`;
1094+
}
1095+
matches.push({ path: rel, lineNum: i + 1, line: text });
1096+
if (matches.length > maxResults) {
1097+
break;
1098+
}
1099+
}
1100+
}
1101+
}
1102+
1103+
const truncated = matches.length > maxResults;
1104+
if (truncated) {
1105+
matches.length = maxResults;
1106+
}
1107+
return { matches, truncated };
1108+
}
1109+
1110+
async function fsGlobSearch(opts: {
1111+
cwd: string;
1112+
pattern: string;
1113+
searchPath: string | undefined;
1114+
maxResults: number;
1115+
}): Promise<{ files: string[]; truncated: boolean }> {
1116+
const { cwd, pattern, searchPath, maxResults } = opts;
1117+
const target = searchPath ? safePath(cwd, searchPath) : cwd;
1118+
const files: string[] = [];
1119+
1120+
for await (const rel of walkFiles(cwd, target, pattern)) {
1121+
files.push(rel);
1122+
if (files.length > maxResults) {
1123+
break;
1124+
}
1125+
}
1126+
1127+
const truncated = files.length > maxResults;
1128+
if (truncated) {
1129+
files.length = maxResults;
1130+
}
1131+
return { files, truncated };
1132+
}
1133+
1134+
// ── Dispatch: try rg, fall back to Node.js ──────────────────────────
1135+
1136+
async function grepSearch(opts: {
1137+
cwd: string;
1138+
pattern: string;
1139+
searchPath: string | undefined;
1140+
include: string | undefined;
1141+
maxResults: number;
1142+
}): Promise<{ matches: GrepMatch[]; truncated: boolean }> {
1143+
const target = opts.searchPath
1144+
? safePath(opts.cwd, opts.searchPath)
1145+
: opts.cwd;
1146+
const resolvedOpts = { ...opts, target };
1147+
try {
1148+
return await rgGrepSearch(resolvedOpts);
1149+
} catch {
1150+
return await fsGrepSearch(opts);
1151+
}
1152+
}
1153+
1154+
async function globSearchImpl(opts: {
1155+
cwd: string;
1156+
pattern: string;
1157+
searchPath: string | undefined;
1158+
maxResults: number;
1159+
}): Promise<{ files: string[]; truncated: boolean }> {
1160+
const target = opts.searchPath
1161+
? safePath(opts.cwd, opts.searchPath)
1162+
: opts.cwd;
1163+
const resolvedOpts = { ...opts, target };
1164+
try {
1165+
return await rgGlobSearch(resolvedOpts);
1166+
} catch {
1167+
return await fsGlobSearch(opts);
1168+
}
1169+
}
1170+
1171+
async function grep(payload: GrepPayload): Promise<LocalOpResult> {
1172+
const { cwd, params } = payload;
1173+
const maxResults = params.maxResultsPerSearch ?? MAX_GREP_RESULTS_PER_SEARCH;
1174+
1175+
const results = await Promise.all(
1176+
params.searches.map(async (search) => {
1177+
const { matches, truncated } = await grepSearch({
1178+
cwd,
1179+
pattern: search.pattern,
1180+
searchPath: search.path,
1181+
include: search.include,
1182+
maxResults,
1183+
});
1184+
return { pattern: search.pattern, matches, truncated };
1185+
})
1186+
);
1187+
1188+
return { ok: true, data: { results } };
1189+
}
1190+
1191+
async function glob(payload: GlobPayload): Promise<LocalOpResult> {
1192+
const { cwd, params } = payload;
1193+
const maxResults = params.maxResults ?? MAX_GLOB_RESULTS;
1194+
1195+
const results = await Promise.all(
1196+
params.patterns.map(async (pattern) => {
1197+
const { files, truncated } = await globSearchImpl({
1198+
cwd,
1199+
pattern,
1200+
searchPath: params.path,
1201+
maxResults,
1202+
});
1203+
return { pattern, files, truncated };
1204+
})
1205+
);
1206+
1207+
return { ok: true, data: { results } };
1208+
}
1209+
1210+
// ── Sentry project + DSN ────────────────────────────────────────────
1211+
8491212
async function createSentryProject(
8501213
payload: CreateSentryProjectPayload,
8511214
options: WizardOptions

0 commit comments

Comments
 (0)