Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ src/
completions.ts # Pure shell-completion generators: generateCompletion(),
# detectShell(), getCompletionFilePath() — no I/O
group.ts # groupByTeamPrefix — team-prefix grouping logic
regex.ts # Pure query parser: isRegexQuery(), buildApiQuery()
# Detects /pattern/ syntax, derives safe API term,
# returns RegExp for local client-side filtering — no I/O
render.ts # Façade re-exporting sub-modules + top-level
# renderGroups() / renderHelpOverlay()
tui.ts # Interactive keyboard-driven UI (navigation, filter mode,
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ github-code-search query "useFeatureFlag" --org my-org --group-by-team-prefix pl

Get a team-scoped view of every usage site before refactoring a shared hook or utility.

**Regex search — pattern-based code audit**

```bash
github-code-search query "/from.*['\"\`]axios/" --org my-org
```

Use `/pattern/` syntax to run a regex search. The CLI automatically derives a safe API query term and filters results locally — no manual post-processing needed. Use `--regex-hint` to override the derived term when auto-extraction is too broad.

## Why not `gh search code`?

The official [`gh` CLI](https://cli.github.com/) does support `gh search code`, but it returns a **flat paginated list** — one result per line, no grouping, no interactive selection, no structured output.
Expand Down
7 changes: 7 additions & 0 deletions docs/.vitepress/theme/ComparisonTable.vue
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ const ROWS: Row[] = [
gcs: true,
docLink: "/usage/interactive-mode",
},
{
feature: "Regex queries (/pattern/flags)",
desc: "Use full regular expressions in queries — top-level alternation (A|B|C) maps to GitHub OR, client-side filtering applies the real pattern. GitHub supports regex in the web UI only, not in the REST API or gh CLI.",
gh: false,
gcs: true,
docLink: "/usage/search-syntax",
},
{
feature: "Pagination (up to 1\u202f000 results)",
desc: "Both tools auto-paginate the GitHub search API \u2014 up to 1\u202f000 results per query.",
Expand Down
8 changes: 8 additions & 0 deletions docs/.vitepress/theme/UseCaseTabs.vue
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ const USE_CASES: UseCase[] = [
"Get a team-scoped view of every usage site before refactoring a shared hook or utility. Essential for onboarding or large-scale refactors.",
command: `github-code-search query "useFeatureFlag" --org my-org --group-by-team-prefix platform/`,
},
{
id: "semver",
label: "Semver / version audit",
headline: "Which repos are pinned to a vulnerable minor version?",
description:
"Use regex syntax to target a precise version range — something a plain keyword search cannot do. Find every repo still locked to axios 1.x, react 17.x, or any other outdated pin, then export the list to a migration issue.",
command: `github-code-search query '/"axios": "1./' --org my-org`,
Comment thread
shouze marked this conversation as resolved.
Outdated
},
];

const active = ref(0);
Expand Down
7 changes: 6 additions & 1 deletion docs/architecture/components.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,21 @@ into a filtered, grouped, formatted output.
C4Component
title Level 3a: CLI data pipeline

UpdateLayoutConfig($c4ShapeInRow="4", $c4BoundaryInRow="1")
UpdateLayoutConfig($c4ShapeInRow="5", $c4BoundaryInRow="1")

Container(cli, "CLI parser", "github-code-search.ts", "Orchestrates filter,<br/>group, output and<br/>shell completions")

Container_Boundary(core, "Pure-function core — no I/O") {
Component(regexParser, "Query parser", "src/regex.ts", "isRegexQuery()<br/>buildApiQuery()")
Component(aggregate, "Filter & aggregation", "src/aggregate.ts", "aggregate()<br/>exclude repos & extracts")
Component(group, "Team grouping", "src/group.ts", "groupByTeamPrefix()<br/>flattenTeamSections()")
Component(outputFn, "Output formatter", "src/output.ts", "buildOutput()<br/>markdown or JSON")
Component(completions, "Shell completions", "src/completions.ts", "generateCompletion()<br/>detectShell()<br/>getCompletionFilePath()")
}

Rel(cli, regexParser, "Parse regex<br/>query")
UpdateRelStyle(cli, regexParser, $offsetX="35", $offsetY="-17")

Rel(cli, aggregate, "Filter<br/>CodeMatch[]")
UpdateRelStyle(cli, aggregate, $offsetX="0", $offsetY="-17")

Expand All @@ -38,6 +42,7 @@ C4Component
UpdateRelStyle(cli, completions, $offsetX="-90", $offsetY="-17")

UpdateElementStyle(cli, $bgColor="#FFCC33", $borderColor="#0000CC", $fontColor="#000000")
UpdateElementStyle(regexParser, $bgColor="#9933FF", $borderColor="#0000CC", $fontColor="#ffffff")
UpdateElementStyle(aggregate, $bgColor="#9933FF", $borderColor="#0000CC", $fontColor="#ffffff")
Comment thread
shouze marked this conversation as resolved.
Outdated
UpdateElementStyle(group, $bgColor="#9933FF", $borderColor="#0000CC", $fontColor="#ffffff")
UpdateElementStyle(outputFn, $bgColor="#9933FF", $borderColor="#0000CC", $fontColor="#ffffff")
Expand Down
23 changes: 12 additions & 11 deletions docs/reference/cli-options.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,18 @@ github-code-search completions [--shell <shell>]

## Search options

| Option | Type | Required | Default | Description |
| ----------------------------------- | --------------------------------- | -------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `--org <org>` | string | ✅ | — | GitHub organization to search in. Automatically injected as `org:<org>` in the query. |
| `--exclude-repositories <repos>` | string | ❌ | `""` | Comma-separated list of repositories to exclude. Short form (`repoA,repoB`) or full form (`org/repoA,org/repoB`) both accepted. |
| `--exclude-extracts <refs>` | string | ❌ | `""` | Comma-separated extract refs to exclude. Format: `repoName:path/to/file:index`. Short form (without org prefix) accepted. |
| `--no-interactive` | boolean (flag) | ❌ | `true` (on) | Disable interactive mode. Interactive mode is **on** by default; pass this flag to disable it. Also triggered by `CI=true`. |
| `--format <format>` | `markdown` \| `json` | ❌ | `markdown` | Output format. See [Output formats](/usage/output-formats). |
| `--output-type <type>` | `repo-and-matches` \| `repo-only` | ❌ | `repo-and-matches` | Controls output detail level. `repo-only` lists repository names only, without individual extracts. |
| `--include-archived` | boolean (flag) | ❌ | `false` | Include archived repositories in results (excluded by default). |
| `--group-by-team-prefix <prefixes>` | string | ❌ | `""` | Comma-separated team-name prefixes for grouping result repos by GitHub team (e.g. `squad-,chapter-`). Requires `read:org` scope. |
| `--no-cache` | boolean (flag) | ❌ | `true` (on) | Bypass the 24 h team-list cache and re-fetch teams from GitHub. Cache is **on** by default; pass this flag to disable it. Only applies with `--group-by-team-prefix`. |
| Option | Type | Required | Default | Description |
| ----------------------------------- | --------------------------------- | -------- | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `--org <org>` | string | ✅ | — | GitHub organization to search in. Automatically injected as `org:<org>` in the query. |
| `--exclude-repositories <repos>` | string | ❌ | `""` | Comma-separated list of repositories to exclude. Short form (`repoA,repoB`) or full form (`org/repoA,org/repoB`) both accepted. |
| `--exclude-extracts <refs>` | string | ❌ | `""` | Comma-separated extract refs to exclude. Format: `repoName:path/to/file:index`. Short form (without org prefix) accepted. |
| `--no-interactive` | boolean (flag) | ❌ | `true` (on) | Disable interactive mode. Interactive mode is **on** by default; pass this flag to disable it. Also triggered by `CI=true`. |
| `--format <format>` | `markdown` \| `json` | ❌ | `markdown` | Output format. See [Output formats](/usage/output-formats). |
| `--output-type <type>` | `repo-and-matches` \| `repo-only` | ❌ | `repo-and-matches` | Controls output detail level. `repo-only` lists repository names only, without individual extracts. |
| `--include-archived` | boolean (flag) | ❌ | `false` | Include archived repositories in results (excluded by default). |
| `--group-by-team-prefix <prefixes>` | string | ❌ | `""` | Comma-separated team-name prefixes for grouping result repos by GitHub team (e.g. `squad-,chapter-`). Requires `read:org` scope. |
| `--no-cache` | boolean (flag) | ❌ | `true` (on) | Bypass the 24 h team-list cache and re-fetch teams from GitHub. Cache is **on** by default; pass this flag to disable it. Only applies with `--group-by-team-prefix`. |
| `--regex-hint <term>` | string | ❌ | — | Override the API search term used when the query is a regex (`/pattern/`). Useful when auto-extraction produces a term that is too broad or too narrow. See [Regex queries](/usage/search-syntax#regex-queries). |

## Global options

Expand Down
48 changes: 48 additions & 0 deletions docs/usage/search-syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,54 @@ github-code-search "useFeatureFlag repo:fulll/billing-api repo:fulll/auth-servic
github-code-search "password= language:TypeScript NOT filename:test" --org fulll
```

## Regex queries

`github-code-search` supports regex syntax using the `/pattern/flags` notation, just like the GitHub web UI.

Because the GitHub Code Search API does not natively support regex, the CLI automatically extracts a representative literal term from the regex to send to the API, then filters the returned results locally with the full pattern. In most cases this is fully transparent.

```bash
# Imports using the axios module (any quote style)
github-code-search "/from.*['\"\`]axios/" --org fulll

# Axios dependency in package.json (any semver prefix)
github-code-search '"axios": "[~^]?[0-9]" filename:package.json' --org fulll
Comment thread
shouze marked this conversation as resolved.
Outdated
Comment thread
shouze marked this conversation as resolved.
Outdated

# Old library require() calls
github-code-search "/require\\(['\"](old-lib)['\"]\\)/" --org fulll

# Any of TODO, FIXME or HACK comments
github-code-search "/TODO|FIXME|HACK/" --org fulll
```

::: tip Top-level alternation
When the regex contains a **top-level `|`** (e.g. `TODO|FIXME|HACK`), the CLI sends
an `A OR B OR C` query to the GitHub API so that **all branches are covered** — no results are missed.
:::

### When auto-extraction is not precise enough

If the extracted term is very short (fewer than 3 characters), the CLI will exit with a warning and ask you to provide a manual hint:

```text
⚠ Regex mode — could not extract a term longer than 2 chars from /[~^]?[0-9]/
Provide a manual hint with --regex-hint <term>.
Comment thread
shouze marked this conversation as resolved.
Outdated
```
Comment thread
shouze marked this conversation as resolved.

Use `--regex-hint` to override the API search term while still applying the full regex filter locally:

```bash
github-code-search '/"axios":\s*"[~^]?[0-9]/ filename:package.json' \
--org fulll \
--regex-hint '"axios"'
```

::: warning API coverage
The GitHub Code Search API returns **at most 1,000 results** per query. The regex filter
is applied to those results; results beyond the API cap can never be seen. Refine the
query with qualifiers (`language:`, `path:`, `filename:`) to keep the result set small.
:::

## API limits

The GitHub Code Search API returns at most **1,000 results** per query. If your query returns more, refine it with qualifiers (especially `language:` or `path:`) to stay below the limit.
Expand Down
39 changes: 37 additions & 2 deletions github-code-search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import { groupByTeamPrefix, flattenTeamSections } from "./src/group.ts";
import { checkForUpdate } from "./src/upgrade.ts";
import { runInteractive } from "./src/tui.ts";
import { generateCompletion, detectShell } from "./src/completions.ts";
import { buildApiQuery, isRegexQuery } from "./src/regex.ts";
import type { OutputFormat, OutputType } from "./src/types.ts";

// Version + build metadata injected at compile time via --define (see build.ts).
Expand Down Expand Up @@ -179,6 +180,15 @@ function addSearchOptions(cmd: Command): Command {
.option(
"--no-cache",
"Bypass the 24 h team-list cache and re-fetch teams from GitHub (only applies with --group-by-team-prefix).",
)
.option(
"--regex-hint <term>",
[
"Override the search term sent to the GitHub API when using a regex query.",
"Useful when auto-extraction produces a term that is too broad or too narrow.",
'Example: --regex-hint "axios" (for query /from.*[\'"]axios/)',
"Docs: https://fulll.github.io/github-code-search/usage/search-syntax#regex-queries",
].join("\n"),
);
Comment thread
shouze marked this conversation as resolved.
Comment thread
shouze marked this conversation as resolved.
}

Expand All @@ -195,6 +205,7 @@ async function searchAction(
includeArchived: boolean;
groupByTeamPrefix: string;
cache: boolean;
regexHint?: string;
},
): Promise<void> {
// ─── GitHub API token ───────────────────────────────────────────────────────
Expand Down Expand Up @@ -264,8 +275,32 @@ async function searchAction(
return activeCooldown;
};

const rawMatches = await fetchAllResults(query, org, GITHUB_TOKEN!, onRateLimit);
let groups = aggregate(rawMatches, excludedRepos, excludedExtractRefs, includeArchived);
// ─── Regex query detection ───────────────────────────────────────────────
let effectiveQuery = query;
let regexFilter: RegExp | undefined;
if (isRegexQuery(query)) {
const { apiQuery, regexFilter: rf, warn } = buildApiQuery(query);
if (warn && !opts.regexHint) {
console.error(
pc.yellow(`⚠ Regex mode — ${warn}\n Provide a manual hint with --regex-hint <term>.`),
);
Comment thread
shouze marked this conversation as resolved.
Outdated
process.exit(1);
}
effectiveQuery = opts.regexHint ?? apiQuery;
regexFilter = rf ?? undefined;
Comment thread
shouze marked this conversation as resolved.
process.stderr.write(
pc.dim(` ℹ Regex mode — GitHub query: "${effectiveQuery}", local filter: ${query}\n`),
);
Comment thread
shouze marked this conversation as resolved.
}

const rawMatches = await fetchAllResults(effectiveQuery, org, GITHUB_TOKEN!, onRateLimit);
let groups = aggregate(
rawMatches,
excludedRepos,
excludedExtractRefs,
includeArchived,
regexFilter,
);

// ─── Team-prefix grouping ─────────────────────────────────────────────────
if (opts.groupByTeamPrefix) {
Expand Down
73 changes: 73 additions & 0 deletions src/aggregate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -238,4 +238,77 @@ describe("aggregate — regexFilter", () => {
aggregate(matches, new Set(), new Set(), false, regex);
expect(regex.lastIndex).toBe(savedIndex);
});

it("recomputes segments to point at the actual regex match (not the API literal)", () => {
// Simulate: regex /axios": "1\.12/, API literal "axios", API gives segment
// at [8,13] (pointing at "axios" only). After aggregation the segment must
Comment thread
shouze marked this conversation as resolved.
Outdated
// cover the full regex match.
//
// Fragment offsets: d e p s : \n " a x i o s " : " 1 . 1 2 . 0 "
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
// regex match: 'axios": "1.12' starts at offset 9, ends at 22
const fragment = 'deps:\n "axios": "1.12.0"';
const matches: CodeMatch[] = [
{
path: "package.json",
repoFullName: "myorg/repoA",
htmlUrl: "https://github.com/myorg/repoA/blob/main/package.json",
archived: false,
textMatches: [
{
fragment,
// API-provided segment: only covers "axios" at offset 9..14
matches: [{ text: "axios", indices: [9, 14], line: 2, col: 4 }],
},
],
},
];

const groups = aggregate(matches, new Set(), new Set(), false, /axios": "1\.12/);
expect(groups).toHaveLength(1);

const seg = groups[0].matches[0].textMatches[0].matches[0];
// The regex matches 'axios": "1.12' starting at offset 9 in the fragment
expect(seg.text).toBe('axios": "1.12');
expect(seg.indices[0]).toBe(9);
expect(seg.indices[1]).toBe(22);
expect(seg.line).toBe(2); // second line of the fragment
expect(seg.col).toBe(4); // after the leading ' "'
});

it("recomputes correct line and col for multiline fragments", () => {
const fragment = "line1\nline2\nfoo bar\nline4";
// 01234 5 67890 1 234567 8 9012
// ^ "foo" at offset 12 = line 3, col 1
const matches: CodeMatch[] = [makeMatchWithFragments("myorg/repoA", "src/a.ts", [fragment])];

const groups = aggregate(matches, new Set(), new Set(), false, /foo/);
const seg = groups[0].matches[0].textMatches[0].matches[0];
expect(seg.text).toBe("foo");
expect(seg.indices).toEqual([12, 15]);
expect(seg.line).toBe(3);
expect(seg.col).toBe(1);
});

it("filters out textMatches where the regex does not match, keeps those where it does", () => {
// One file with two textMatches: only the second one matches the regex.
const matches: CodeMatch[] = [
{
path: "src/a.ts",
repoFullName: "myorg/repoA",
htmlUrl: "",
archived: false,
textMatches: [
{ fragment: "unrelated code", matches: [] },
{ fragment: "import axios from 'axios'", matches: [] },
],
},
];

const groups = aggregate(matches, new Set(), new Set(), false, /axios/);
expect(groups).toHaveLength(1);
// Only the matching textMatch is kept
expect(groups[0].matches[0].textMatches).toHaveLength(1);
expect(groups[0].matches[0].textMatches[0].fragment).toBe("import axios from 'axios'");
});
});
Loading
Loading