Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
# process.exit(). Running each file in its own process lets
# the OS reclaim all resources cleanly.
# pipeline — 12 files: ingestion pipeline + csv + 9 resolver tests
# e2e — 2 files: child-process only (spawnSync), no in-process lbug
# e2e — 4 files: child-process only (spawnSync), no in-process lbug
# standalone — 4 files: pure logic, no lbug, no child processes
test-matrix:
name: integration (${{ matrix.os }} / ${{ matrix.test-group }})
Expand Down Expand Up @@ -58,6 +58,7 @@ jobs:
test/integration/cli-e2e.test.ts
test/integration/hooks-e2e.test.ts
test/integration/skills-e2e.test.ts
test/integration/ignore-and-skip-e2e.test.ts
- test-group: standalone
test-glob: >-
test/integration/filesystem-walker.test.ts
Expand Down Expand Up @@ -152,6 +153,7 @@ jobs:
test/integration/enrichment.test.ts
test/integration/tree-sitter-languages.test.ts
test/integration/worker-pool.test.ts
test/integration/ignore-and-skip-e2e.test.ts
test/integration/resolvers/typescript.test.ts
test/integration/resolvers/csharp.test.ts
test/integration/resolvers/cpp.test.ts
Expand Down
42 changes: 33 additions & 9 deletions .github/workflows/claude-code-review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ on:
issue_comment:
types: [created]

# Serialize per-PR so concurrent @claude comments don't race on the
# temporary fork branch push/delete.
concurrency:
group: claude-review-${{ github.event.issue.number || github.event.pull_request.number }}
cancel-in-progress: false

jobs:
claude-review:
# Run only when:
Expand All @@ -41,7 +47,7 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: write # needed to push fork branch to origin
contents: write # needed to create fork branch ref via API
pull-requests: write
issues: read
id-token: write
Expand Down Expand Up @@ -76,11 +82,25 @@ jobs:
fetch-depth: 1

# claude-code-action fetches branches by name from origin, which fails
# for fork PRs. Work around by pushing the fork branch to origin so
# the action can find it. Cleaned up in the post step below.
- name: Push fork branch to origin
# for fork PRs. Create a temporary branch ref via the API so the action
# can find it. Using the API (not git push) avoids the GITHUB_TOKEN
# restriction that blocks pushing commits containing workflow file changes.
- name: Create fork branch ref on origin
id: push-fork
if: steps.pr.outputs.is_fork == 'true'
run: git push origin HEAD:refs/heads/${{ steps.pr.outputs.branch }}
env:
FORK_BRANCH: ${{ steps.pr.outputs.branch }}
FORK_SHA: ${{ steps.pr.outputs.sha }}
GH_TOKEN: ${{ github.token }}
run: |
gh api "repos/${{ github.repository }}/git/refs" \
--method POST \
-f ref="refs/heads/$FORK_BRANCH" \
-f sha="$FORK_SHA" \
|| gh api "repos/${{ github.repository }}/git/refs/heads/$FORK_BRANCH" \
--method PATCH \
-f sha="$FORK_SHA" \
-F force=true

- name: Run Claude Code Review
id: claude-review
Expand All @@ -91,7 +111,11 @@ jobs:
plugins: 'code-review@claude-code-plugins'
prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ steps.pr.outputs.number }}'

# Clean up the temporary branch we pushed for fork PRs
- name: Delete fork branch from origin
if: always() && steps.pr.outputs.is_fork == 'true'
run: git push origin --delete refs/heads/${{ steps.pr.outputs.branch }} || true
# Clean up the temporary branch ref we created for fork PRs.
# Only delete if the create step actually succeeded.
- name: Delete fork branch ref from origin
if: always() && steps.push-fork.outcome == 'success'
env:
FORK_BRANCH: ${{ steps.pr.outputs.branch }}
GH_TOKEN: ${{ github.token }}
run: gh api "repos/${{ github.repository }}/git/refs/heads/$FORK_BRANCH" --method DELETE || true
77 changes: 75 additions & 2 deletions .github/workflows/claude.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ on:
pull_request_review:
types: [submitted]

# Serialize per-PR so concurrent @claude comments don't race on the
# temporary fork branch push/delete.
concurrency:
group: claude-code-${{ github.event.issue.number || github.event.pull_request.number || github.event.issue.id }}
cancel-in-progress: false

jobs:
claude:
if: |
Expand All @@ -20,17 +26,75 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: read
contents: write # needed to create fork branch ref via API
pull-requests: write
issues: write
id-token: write
actions: read # Required for Claude to read CI results on PRs
actions: read # required for Claude to read CI results on PRs
steps:
# For PR-related triggers, resolve fork context so we can create a
# temporary branch ref (claude-code-action fetches by branch name).
- name: Resolve PR context
id: pr
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
with:
script: |
// Determine if this event is PR-related
let prNumber = null;
if (context.eventName === 'issue_comment' && context.payload.issue.pull_request) {
prNumber = context.payload.issue.number;
} else if (context.eventName === 'pull_request_review_comment') {
prNumber = context.payload.pull_request.number;
} else if (context.eventName === 'pull_request_review') {
prNumber = context.payload.pull_request.number;
}

if (!prNumber) {
core.setOutput('is_pr', 'false');
core.setOutput('is_fork', 'false');
return;
}

const resp = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: prNumber,
});
const pr = resp.data;
const isFork = pr.head.repo.full_name !== pr.base.repo.full_name;

core.setOutput('is_pr', 'true');
core.setOutput('is_fork', String(isFork));
core.setOutput('branch', pr.head.ref);
core.setOutput('sha', pr.head.sha);

- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
ref: ${{ steps.pr.outputs.is_fork == 'true' && steps.pr.outputs.sha || '' }}
fetch-depth: 1

# claude-code-action fetches branches by name from origin, which fails
# for fork PRs. Create a temporary branch ref via the API so the action
# can find it. Using the API (not git push) avoids the GITHUB_TOKEN
# restriction that blocks pushing commits containing workflow file changes.
- name: Create fork branch ref on origin
id: push-fork
if: steps.pr.outputs.is_fork == 'true'
env:
FORK_BRANCH: ${{ steps.pr.outputs.branch }}
FORK_SHA: ${{ steps.pr.outputs.sha }}
GH_TOKEN: ${{ github.token }}
run: |
gh api "repos/${{ github.repository }}/git/refs" \
--method POST \
-f ref="refs/heads/$FORK_BRANCH" \
-f sha="$FORK_SHA" \
|| gh api "repos/${{ github.repository }}/git/refs/heads/$FORK_BRANCH" \
--method PATCH \
-f sha="$FORK_SHA" \
-F force=true

- name: Run Claude Code
id: claude
uses: anthropics/claude-code-action@9469d113c6afd29550c402740f22d1a97dd1209b # v1
Expand All @@ -40,3 +104,12 @@ jobs:
# This is an optional setting that allows Claude to read CI results on PRs
additional_permissions: |
actions: read

# Clean up the temporary branch ref we created for fork PRs.
# Only delete if the create step actually succeeded.
- name: Delete fork branch ref from origin
if: always() && steps.push-fork.outcome == 'success'
env:
FORK_BRANCH: ${{ steps.pr.outputs.branch }}
GH_TOKEN: ${{ github.token }}
run: gh api "repos/${{ github.repository }}/git/refs/heads/$FORK_BRANCH" --method DELETE || true
10 changes: 10 additions & 0 deletions gitnexus/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions gitnexus/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
"graphology-indices": "^0.17.0",
"graphology-utils": "^2.3.0",
"@ladybugdb/core": "^0.15.1",
"ignore": "^7.0.5",
"lru-cache": "^11.0.0",
"mnemonist": "^0.39.0",
"pandemonium": "^2.4.0",
Expand Down
4 changes: 4 additions & 0 deletions gitnexus/src/cli/analyze.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ export const analyzeCommand = async (
return;
}

if (process.env.GITNEXUS_NO_GITIGNORE) {
console.log(' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n');
}

// Single progress bar for entire pipeline
const bar = new cliProgress.SingleBar({
format: ' {bar} {percentage}% | {phase}',
Expand Down
1 change: 1 addition & 0 deletions gitnexus/src/cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ program
.option('--embeddings', 'Enable embedding generation for semantic search (off by default)')
.option('--skills', 'Generate repo-specific skill files from detected communities')
.option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)')
.addHelpText('after', '\nEnvironment variables:\n GITNEXUS_NO_GITIGNORE=1 Skip .gitignore parsing (still reads .gitnexusignore)')
.action(createLazyAction(() => import('./analyze.js'), 'analyzeCommand'));

program
Expand Down
92 changes: 92 additions & 0 deletions gitnexus/src/config/ignore-service.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import ignore, { type Ignore } from 'ignore';
import fs from 'fs/promises';
import nodePath from 'path';
import type { Path } from 'path-scurry';

const DEFAULT_IGNORE_LIST = new Set([
// Version Control
'.git',
Expand Down Expand Up @@ -186,6 +191,10 @@ const IGNORED_FILES = new Set([



// NOTE: Negation patterns in .gitnexusignore (e.g. `!vendor/`) cannot override
// entries in DEFAULT_IGNORE_LIST — this is intentional. The hardcoded list protects
// against indexing directories that are almost never source code (node_modules, .git, etc.).
// Users who need to include such directories should remove them from the hardcoded list.
export const shouldIgnorePath = (filePath: string): boolean => {
const normalizedPath = filePath.replace(/\\/g, '/');
const parts = normalizedPath.split('/');
Expand Down Expand Up @@ -237,3 +246,86 @@ export const shouldIgnorePath = (filePath: string): boolean => {
return false;
}

/** Check if a directory name is in the hardcoded ignore list */
export const isHardcodedIgnoredDirectory = (name: string): boolean => {
return DEFAULT_IGNORE_LIST.has(name);
};

/**
* Load .gitignore and .gitnexusignore rules from the repo root.
* Returns an `ignore` instance with all patterns, or null if no files found.
*/
export interface IgnoreOptions {
/** Skip .gitignore parsing, only read .gitnexusignore. Defaults to GITNEXUS_NO_GITIGNORE env var. */
noGitignore?: boolean;
}

export const loadIgnoreRules = async (
repoPath: string,
options?: IgnoreOptions
): Promise<Ignore | null> => {
const ig = ignore();
let hasRules = false;

// Allow users to bypass .gitignore parsing (e.g. when .gitignore accidentally excludes source files)
const skipGitignore = options?.noGitignore ?? !!process.env.GITNEXUS_NO_GITIGNORE;
const filenames = skipGitignore
? ['.gitnexusignore']
: ['.gitignore', '.gitnexusignore'];

for (const filename of filenames) {
try {
const content = await fs.readFile(nodePath.join(repoPath, filename), 'utf-8');
ig.add(content);
hasRules = true;
} catch (err: unknown) {
const code = (err as NodeJS.ErrnoException).code;
if (code !== 'ENOENT') {
console.warn(` Warning: could not read ${filename}: ${(err as Error).message}`);
}
}
}

return hasRules ? ig : null;
};

/**
* Create a glob-compatible ignore filter combining:
* - .gitignore / .gitnexusignore patterns (via `ignore` package)
* - Hardcoded DEFAULT_IGNORE_LIST, IGNORED_EXTENSIONS, IGNORED_FILES
*
* Returns an IgnoreLike object for glob's `ignore` option,
* enabling directory-level pruning during traversal.
*/
export const createIgnoreFilter = async (repoPath: string, options?: IgnoreOptions) => {
const ig = await loadIgnoreRules(repoPath, options);

return {
ignored(p: Path): boolean {
// path-scurry's Path.relative() returns POSIX paths on all platforms,
// which is what the `ignore` package expects. No explicit normalization needed.
const rel = p.relative();
if (!rel) return false;
// Check .gitignore / .gitnexusignore patterns
if (ig && ig.ignores(rel)) return true;
// Fall back to hardcoded rules
return shouldIgnorePath(rel);
},
childrenIgnored(p: Path): boolean {
// Fast path: check directory name against hardcoded list.
// Note: dot-directories (.git, .vscode, etc.) are primarily excluded by
// glob's `dot: false` option in filesystem-walker.ts. This check is
// defense-in-depth — do not remove `dot: false` assuming this covers it.
if (DEFAULT_IGNORE_LIST.has(p.name)) return true;
// Check against .gitignore / .gitnexusignore patterns.
// Test both bare path and path with trailing slash to handle
// bare-name patterns (e.g. `local`) and dir-only patterns (e.g. `local/`).
if (ig) {
const rel = p.relative();
if (rel && (ig.ignores(rel) || ig.ignores(rel + '/'))) return true;
}
return false;
},
};
};

9 changes: 5 additions & 4 deletions gitnexus/src/core/ingestion/filesystem-walker.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fs from 'fs/promises';
import path from 'path';
import { glob } from 'glob';
import { shouldIgnorePath } from '../../config/ignore-service.js';
import { createIgnoreFilter } from '../../config/ignore-service.js';

export interface FileEntry {
path: string;
Expand Down Expand Up @@ -32,13 +32,14 @@ export const walkRepositoryPaths = async (
repoPath: string,
onProgress?: (current: number, total: number, filePath: string) => void
): Promise<ScannedFile[]> => {
const files = await glob('**/*', {
const ignoreFilter = await createIgnoreFilter(repoPath);

const filtered = await glob('**/*', {
cwd: repoPath,
nodir: true,
dot: false,
ignore: ignoreFilter,
});

const filtered = files.filter(file => !shouldIgnorePath(file));
const entries: ScannedFile[] = [];
let processed = 0;
let skippedLarge = 0;
Expand Down
Loading
Loading