From 52c8b529b86696978e1d43f438ed488af3982af9 Mon Sep 17 00:00:00 2001 From: LorenzoFeng Date: Sat, 23 May 2026 16:23:56 +0800 Subject: [PATCH 1/2] fix(watcher): use chokidar with .gitignore filtering to prevent inotify exhaustion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace raw fs.watch({recursive:true}) with chokidar, which supports an 'ignored' callback that filters directories BEFORE registering inotify watches. Previously, fs.watch registered a watch on every directory under the project root (including node_modules/, .git/, dist/, .next/, etc.) and only filtered in the callback — wasting kernel watch budget on directories whose events were discarded. The chokidar ignored callback loads .gitignore rules from the project root upward (using the existing 'ignore' package dependency) and also hardcodes exclusion of .codegraph/ and .git/. This drops per-instance watch count from hundreds of thousands to hundreds on monorepos. Closes #276 --- package-lock.json | 34 +++++++++- package.json | 1 + src/sync/watcher.ts | 158 +++++++++++++++++++++++++++++++++++--------- 3 files changed, 158 insertions(+), 35 deletions(-) diff --git a/package-lock.json b/package-lock.json index 36c592b15..d82b716a4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,15 +1,16 @@ { "name": "@colbymchenry/codegraph", - "version": "0.9.3", + "version": "0.9.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@colbymchenry/codegraph", - "version": "0.9.3", + "version": "0.9.4", "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", + "chokidar": "^4.0.3", "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", @@ -1004,6 +1005,21 @@ "node": ">= 16" } }, + "node_modules/chokidar": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz", + "integrity": "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==", + "license": "MIT", + "dependencies": { + "readdirp": "^4.0.1" + }, + "engines": { + "node": ">= 14.16.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + } + }, "node_modules/commander": { "version": "14.0.3", "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz", @@ -1269,6 +1285,19 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/readdirp": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz", + "integrity": "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==", + "license": "MIT", + "engines": { + "node": ">= 14.18.0" + }, + "funding": { + "type": "individual", + "url": "https://paulmillr.com/funding/" + } + }, "node_modules/rollup": { "version": "4.57.1", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.57.1.tgz", @@ -1431,7 +1460,6 @@ "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", diff --git a/package.json b/package.json index 5455ced92..beb25d9ab 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", + "chokidar": "^4.0.3", "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", diff --git a/src/sync/watcher.ts b/src/sync/watcher.ts index 68e60fffb..aea311a85 100644 --- a/src/sync/watcher.ts +++ b/src/sync/watcher.ts @@ -4,11 +4,16 @@ * Watches the project directory for file changes and triggers * debounced sync operations to keep the code graph up-to-date. * - * Uses Node.js native fs.watch with recursive mode (macOS FSEvents, - * Windows ReadDirectoryChangesW, Linux inotify on Node 19+). + * Uses chokidar under the hood, which provides cross-platform file + * watching with built-in filtering to avoid registering unnecessary + * inotify watches (fixes #276: fs.watch recursive exhausts kernel + * watch budget on large repos). */ import * as fs from 'fs'; +import * as path from 'path'; +import chokidar, { FSWatcher } from 'chokidar'; +import ignore, { Ignore } from 'ignore'; import { isSourceFile } from '../extraction'; import { logDebug, logWarn } from '../errors'; import { normalizePath } from '../utils'; @@ -36,22 +41,64 @@ export interface WatchOptions { onSyncError?: (error: Error) => void; } +/** + * Represents a .gitignore file loaded from a specific directory. + * Rules in a .gitignore are relative to that directory, mirroring + * how git applies .gitignore files at every level. + */ +interface ScopedIgnore { + dir: string; + ig: Ignore; +} + +/** + * Load .gitignore files from projectRoot upward through parent + * directories. Returns a list ordered from root to projectRoot + * so nested rules (closest to the project) are checked first. + */ +function loadGitignoreChain(projectRoot: string): ScopedIgnore[] { + const matchers: ScopedIgnore[] = []; + let dir = projectRoot; + + // Determine the filesystem root (e.g. '/' on Linux) + const root = path.parse(dir).root; + + while (dir !== root) { + const giPath = path.join(dir, '.gitignore'); + try { + if (fs.existsSync(giPath)) { + matchers.unshift({ + dir, + ig: ignore().add(fs.readFileSync(giPath, 'utf-8')), + }); + } + } catch { + // Unreadable .gitignore — treat as absent + } + dir = path.dirname(dir); + } + + return matchers; +} + /** * FileWatcher monitors a project directory for changes and triggers * debounced sync operations via a provided callback. * * Design goals: - * - Minimal resource usage (native OS file events, no polling) + * - Minimal resource usage (chokidar with .gitignore-aware filtering + * avoids registering inotify watches on excluded directories) * - Debounced to avoid thrashing on rapid saves * - Filters to supported source files by extension * - Ignores .codegraph/ directory changes */ export class FileWatcher { - private watcher: fs.FSWatcher | null = null; + private watcher: FSWatcher | null = null; private debounceTimer: ReturnType | null = null; private hasChanges = false; private syncing = false; private stopped = false; + private gitignoreMatchers: ScopedIgnore[] = []; private readonly projectRoot: string; private readonly debounceMs: number; @@ -79,57 +126,103 @@ export class FileWatcher { if (this.watcher) return true; // Already watching this.stopped = false; - // Some environments make recursive fs.watch unusable — most notably WSL2 - // /mnt/ drives, where setup blocks long enough to break MCP startup - // handshakes (issue #199). Skip watching there; callers fall back to - // manual `codegraph sync` or the git sync hooks. + // Some environments make filesystem watching unusable — most notably + // WSL2 /mnt/ drives, where the underlying fs.watch calls block long + // enough to break MCP startup handshakes (issue #199). Skip watching + // there; callers fall back to manual `codegraph sync` or git sync hooks. const disabledReason = watchDisabledReason(this.projectRoot); if (disabledReason) { logDebug('File watcher disabled', { reason: disabledReason, projectRoot: this.projectRoot }); return false; } - try { - this.watcher = fs.watch( - this.projectRoot, - { recursive: true }, - (_eventType, filename) => { - if (!filename || this.stopped) return; + // Load .gitignore rules from project root upward. + // These drive chokidar's `ignored` callback so we never register + // inotify watches on excluded directories (like node_modules/, .git/, + // dist/, .next/, etc.), avoiding kernel watch-budget exhaustion (#276). + this.gitignoreMatchers = loadGitignoreChain(this.projectRoot); - // Normalize path separators - const normalized = normalizePath(filename); + try { + this.watcher = chokidar.watch(this.projectRoot, { + // Core fix for #276: filter directories BEFORE they are watched. + // chokidar calls this for every file and directory it encounters, + // and only registers an underlying fs.watch on those that pass. + // This drops per-instance inotify watch count from hundreds of + // thousands (on a monorepo) to hundreds — only the directories + // that actually contain tracked source code. + ignored: (testPath: string) => { + const rel = normalizePath(path.relative(this.projectRoot, testPath)); - // Ignore .codegraph/ directory changes (our own DB writes) + // Always ignore .codegraph/ (our own DB writes) and .git/ if ( - normalized === '.codegraph' || - normalized.startsWith('.codegraph/') || - normalized.startsWith('.codegraph\\') + rel === '.codegraph' || + rel.startsWith('.codegraph/') || + rel === '.git' || + rel.startsWith('.git/') ) { - return; + return true; } - // Only sync changes to files we can actually parse. - if (!isSourceFile(normalized)) { - return; + // Check .gitignore rules + for (const { dir, ig } of this.gitignoreMatchers) { + let matcherRel = normalizePath(path.relative(dir, testPath)); + if (!matcherRel || matcherRel.startsWith('..')) continue; + + // For directory-only .gitignore rules (e.g. "build/"), + // append a trailing slash so the ignore package matches them. + try { + const stat = fs.statSync(testPath); + if (stat.isDirectory()) matcherRel += '/'; + } catch { + // If we can't stat, assume it's a file — don't append '/' + } + + if (ig.ignores(matcherRel)) return true; } - logDebug('File change detected', { file: normalized }); - this.hasChanges = true; - this.scheduleSync(); + return false; + }, + }); + + // Wire up the file-change handler. chokidar emits 'all' for every + // event type; we only care about files that were actually changed. + this.watcher.on('all', (_event: string, filePath: string) => { + if (this.stopped) return; + + const normalized = normalizePath(path.relative(this.projectRoot, filePath)); + + // Defense in depth: filter again even though `ignored` should + // have prevented watches on these directories. Events can still + // arrive during watcher setup or from symlink traversal. + if ( + normalized === '.codegraph' || + normalized.startsWith('.codegraph/') || + normalized === '.git' || + normalized.startsWith('.git/') + ) { + return; + } + + // Only sync changes to files we can actually parse. + if (!isSourceFile(normalized)) { + return; } - ); + + logDebug('File change detected', { file: normalized }); + this.hasChanges = true; + this.scheduleSync(); + }); // Handle watcher errors gracefully - this.watcher.on('error', (err) => { + this.watcher.on('error', (err: unknown) => { logWarn('File watcher error', { error: String(err) }); - // Don't crash — watcher may recover or user can restart }); logDebug('File watcher started', { projectRoot: this.projectRoot, debounceMs: this.debounceMs }); return true; } catch (err) { - // Recursive watch not supported (e.g., Linux < Node 19) - logWarn('Could not start file watcher — recursive fs.watch not supported on this platform', { error: String(err) }); + // Watcher setup failed (e.g., permission denied, missing directory) + logWarn('Could not start file watcher', { error: String(err) }); return false; } } @@ -151,6 +244,7 @@ export class FileWatcher { } this.hasChanges = false; + this.gitignoreMatchers = []; logDebug('File watcher stopped'); } From acc64a7cc2a3dd374976d8e294263efe701e3b75 Mon Sep 17 00:00:00 2001 From: Colby McHenry Date: Mon, 25 May 2026 20:12:02 -0500 Subject: [PATCH 2/2] fix(watcher): reuse indexer ignore set; harden daemon lock for concurrent startup (#276) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds on 7emotions' chokidar watcher (#346): - Feed chokidar's `ignored` from the indexer's own `buildDefaultIgnore` (built-in default-ignore dirs + the project .gitignore) instead of a .gitignore-only chain — so node_modules/build/cache dirs are excluded even when the repo has no .gitignore (the #407 default-ignore gap), and the watcher can never diverge from index scope. Drop the per-path statSync (use chokidar's passed stats). Measured: ~1200 -> ~14 inotify watches on a 900-dir node_modules, with no .gitignore. - Add a behavioral test that node_modules edits don't trigger a sync while source edits do. Also hardens the #411 daemon lock against a concurrent-startup race the new watcher's timing made reproducible: the lockfile is now created atomically WITH its content via temp-write + hard-link (link() is atomic and exclusive), eliminating the empty-file window where a racing candidate could read an empty lock and unlink the winner's — which produced two daemons (two watchers, two writers). Stress-tested 8x on macOS and 5x on Linux; was failing ~2/3 before. Validated on macOS, Linux (Docker), and Windows (chokidar + fs.linkSync work on NTFS): full suite green; watcher excludes node_modules with no .gitignore; daemon concurrent-startup converges on one daemon. Co-Authored-By: Colby McHenry Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 14 +++ __tests__/watcher.test.ts | 30 +++++++ package-lock.json | 1 + src/extraction/index.ts | 2 +- src/mcp/daemon.ts | 74 ++++++++-------- src/sync/watcher.ts | 178 +++++++++++++------------------------- 6 files changed, 147 insertions(+), 152 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cf4b58e1..1d1192883 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,20 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). already attached to the old daemon keep using it while new sessions run standalone until it idles out — they never mix versions over the socket. +### Fixed +- **The file watcher no longer exhausts the OS file-watch budget on large + repos (#276).** It used to register a recursive watch over the *entire* + project — `node_modules/`, build output, caches and all — and filter only + after the fact. On Linux that meant hundreds of thousands of inotify watches + per project; enough that a second project, or codegraph alongside your editor + / `next dev`, could hit the per-user ceiling and fail with "OS file watch + limit reached." The watcher now excludes the same directories the indexer + ignores (the built-in default-ignore set **plus** your `.gitignore`) *before* + registering a watch — so on a repo with a 900-directory `node_modules` the + watch count drops from ~1,200 to ~14, even when the project has no + `.gitignore`. (Stacks with the shared daemon from #411: one watcher across + agents, and now that watcher is small.) + ## [0.9.5] - 2026-05-25 ### Fixed diff --git a/__tests__/watcher.test.ts b/__tests__/watcher.test.ts index fde5f5935..ac796007f 100644 --- a/__tests__/watcher.test.ts +++ b/__tests__/watcher.test.ts @@ -166,6 +166,36 @@ describe('FileWatcher', () => { watcher.stop(); }); + + it('should not watch node_modules even without a .gitignore (#276/#417)', async () => { + // No .gitignore in testDir — exclusion relies on the built-in + // default-ignore set the indexer uses (buildDefaultIgnore), which a + // .gitignore-only filter would miss. + fs.mkdirSync(path.join(testDir, 'node_modules', 'dep', 'lib'), { recursive: true }); + fs.writeFileSync(path.join(testDir, 'node_modules', 'dep', 'index.ts'), 'export const dep = 1;'); + + const syncFn = vi.fn().mockResolvedValue({ filesChanged: 0, durationMs: 0 }); + const watcher = new FileWatcher(testDir, syncFn, { debounceMs: 200 }); + watcher.start(); + + // Let the watcher settle past any residual crawl events. + await new Promise((r) => setTimeout(r, 400)); + syncFn.mockClear(); + + // A source-extension edit INSIDE node_modules must NOT trigger a sync — + // the directory was never watched. + fs.writeFileSync(path.join(testDir, 'node_modules', 'dep', 'lib', 'extra.ts'), 'export const e = 2;'); + await new Promise((r) => setTimeout(r, 600)); + expect(syncFn).not.toHaveBeenCalled(); + + // Positive control: a real source edit still triggers sync, proving the + // watcher is live (not merely inert). + fs.writeFileSync(path.join(testDir, 'src', 'live.ts'), 'export const live = 3;'); + await waitFor(() => syncFn.mock.calls.length > 0, 5000); + expect(syncFn).toHaveBeenCalled(); + + watcher.stop(); + }); }); describe('callbacks', () => { diff --git a/package-lock.json b/package-lock.json index d82b716a4..f186ddcf4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1460,6 +1460,7 @@ "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", diff --git a/src/extraction/index.ts b/src/extraction/index.ts index 52a63d275..8332c6c3a 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -166,7 +166,7 @@ const DEFAULT_IGNORE_PATTERNS: string[] = [ * the defaults apply to tracked files too (committing a dependency dir doesn't make * it project code; the explicit `.gitignore` negation is the only opt-in). */ -function buildDefaultIgnore(rootDir: string): Ignore { +export function buildDefaultIgnore(rootDir: string): Ignore { const ig = ignore().add(DEFAULT_IGNORE_PATTERNS); try { const rootGitignore = path.join(rootDir, '.gitignore'); diff --git a/src/mcp/daemon.ts b/src/mcp/daemon.ts index a9b5eaa69..ffd262476 100644 --- a/src/mcp/daemon.ts +++ b/src/mcp/daemon.ts @@ -280,20 +280,24 @@ export type AcquireResult = | { kind: 'taken'; existing: DaemonLockInfo | null; pidPath: string }; /** - * Atomically create the daemon pidfile AND write its full record in the same - * call. Returns either an `acquired` result (the caller is now the daemon-elect - * and may construct a {@link Daemon}) or a `taken` result. + * Atomically create the daemon pidfile with its full record already in place. + * Returns either an `acquired` result (the caller is the daemon-elect and may + * construct a {@link Daemon}) or a `taken` result. * - * must-fix 1 (issue #411 review): the original implementation created the - * pidfile empty under an `O_EXCL` fd and only wrote the body later, after - * `server.listen` resolved. A second candidate that read the pidfile during - * that millisecond-wide window saw an empty file, decoded it as `null`, treated - * it as stale, and `unlink`'d the lock the first daemon still held — producing - * two daemons (two watchers, two writers) on concurrent startup, exactly the - * multi-agent scenario the feature targets. Writing the complete record before - * returning the handle closes that window: a concurrent reader always sees a - * valid pid+version+socketPath, never an empty file. The socket path is - * deterministic from the project root, so it's known here. + * must-fix 1 (issue #411 review): the lockfile must appear in ONE atomic step, + * already complete — never empty, even momentarily. The first attempt at this + * (`O_EXCL` create then a separate `writeSync`) left a microsecond window where + * the file existed but was empty; under concurrent daemon startup a third + * candidate could read that empty file, decode it as `null`, and `unlink` the + * winner's lock → two daemons (two watchers, two writers). The window was + * normally too small to hit, but the chokidar watcher's extra startup time made + * concurrent daemons overlap enough to reproduce it reliably. + * + * The fix writes the complete record to a private temp file, then hard-links it + * into place: `link()` is atomic AND exclusive (EEXIST if the target exists), so + * the pidfile becomes visible in one step already containing a full record. + * Whoever links first wins; everyone else gets EEXIST and reads a complete file. + * There is no empty-file window at all. */ export function tryAcquireDaemonLock(projectRoot: string): AcquireResult { const pidPath = getDaemonPidPath(projectRoot); @@ -301,34 +305,36 @@ export function tryAcquireDaemonLock(projectRoot: string): AcquireResult { // thing to touch it on a fresh-clone-but-already-initialized checkout. fs.mkdirSync(path.dirname(pidPath), { recursive: true }); + const info: DaemonLockInfo = { + pid: process.pid, + version: CodeGraphPackageVersion, + socketPath: getDaemonSocketPath(projectRoot), + startedAt: Date.now(), + }; + + // Temp name is pid-scoped so racing candidates never collide on it. + const tmp = `${pidPath}.${process.pid}.tmp`; + let acquired = false; try { - // `wx` = O_CREAT | O_EXCL | O_WRONLY: atomic "create only if absent". - const fd = fs.openSync(pidPath, 'wx', 0o600); - const info: DaemonLockInfo = { - pid: process.pid, - version: CodeGraphPackageVersion, - socketPath: getDaemonSocketPath(projectRoot), - startedAt: Date.now(), - }; + fs.writeFileSync(tmp, encodeLockInfo(info), { mode: 0o600 }); try { - // Synchronous write immediately after the create — no await in between — - // so the empty-file window is a single fs.writeSync, not an I/O-bound - // `server.listen`. Combined with the pid-verified `clearStaleDaemonLock` - // below, concurrent candidates can never delete a live daemon's lock. - fs.writeSync(fd, encodeLockInfo(info)); - } finally { - fs.closeSync(fd); + fs.linkSync(tmp, pidPath); // atomic + exclusive + acquired = true; + } catch (err: unknown) { + if ((err as NodeJS.ErrnoException).code !== 'EEXIST') throw err; } - return { kind: 'acquired', pidPath, info }; - } catch (err: unknown) { - const e = err as NodeJS.ErrnoException; - if (e.code !== 'EEXIST') throw err; + } finally { + try { fs.unlinkSync(tmp); } catch { /* temp already gone */ } } + if (acquired) return { kind: 'acquired', pidPath, info }; + + // Taken. Because the pidfile was link'd atomically it always holds a complete + // record — `existing` is null only for a genuinely corrupt leftover, never a + // mid-write race. let existing: DaemonLockInfo | null = null; try { - const raw = fs.readFileSync(pidPath, 'utf8'); - existing = decodeLockInfo(raw); + existing = decodeLockInfo(fs.readFileSync(pidPath, 'utf8')); } catch { /* unreadable lockfile — treat as malformed */ } return { kind: 'taken', existing, pidPath }; } diff --git a/src/sync/watcher.ts b/src/sync/watcher.ts index aea311a85..692cb93e2 100644 --- a/src/sync/watcher.ts +++ b/src/sync/watcher.ts @@ -1,20 +1,24 @@ /** * File Watcher * - * Watches the project directory for file changes and triggers - * debounced sync operations to keep the code graph up-to-date. + * Watches the project directory for file changes and triggers debounced sync + * operations to keep the code graph up-to-date. * - * Uses chokidar under the hood, which provides cross-platform file - * watching with built-in filtering to avoid registering unnecessary - * inotify watches (fixes #276: fs.watch recursive exhausts kernel - * watch budget on large repos). + * Uses chokidar, whose `ignored` callback filters directories BEFORE they are + * watched — so we never register inotify watches on excluded trees like + * node_modules/, dist/, .git/ (fixes #276: recursive fs.watch exhausted the + * kernel watch budget on large repos). The ignore decision reuses the indexer's + * `buildDefaultIgnore` (built-in default-ignore dirs + the project's .gitignore) + * so the watcher watches exactly the set the indexer indexes — in particular, + * node_modules/build/cache dirs are excluded even when the repo has no + * .gitignore (#407), which a .gitignore-only filter would miss. */ -import * as fs from 'fs'; import * as path from 'path'; +import type { Stats } from 'fs'; import chokidar, { FSWatcher } from 'chokidar'; -import ignore, { Ignore } from 'ignore'; -import { isSourceFile } from '../extraction'; +import type { Ignore } from 'ignore'; +import { isSourceFile, buildDefaultIgnore } from '../extraction'; import { logDebug, logWarn } from '../errors'; import { normalizePath } from '../utils'; import { watchDisabledReason } from './watch-policy'; @@ -41,56 +45,16 @@ export interface WatchOptions { onSyncError?: (error: Error) => void; } -/** - * Represents a .gitignore file loaded from a specific directory. - * Rules in a .gitignore are relative to that directory, mirroring - * how git applies .gitignore files at every level. - */ -interface ScopedIgnore { - dir: string; - ig: Ignore; -} - -/** - * Load .gitignore files from projectRoot upward through parent - * directories. Returns a list ordered from root to projectRoot - * so nested rules (closest to the project) are checked first. - */ -function loadGitignoreChain(projectRoot: string): ScopedIgnore[] { - const matchers: ScopedIgnore[] = []; - let dir = projectRoot; - - // Determine the filesystem root (e.g. '/' on Linux) - const root = path.parse(dir).root; - - while (dir !== root) { - const giPath = path.join(dir, '.gitignore'); - try { - if (fs.existsSync(giPath)) { - matchers.unshift({ - dir, - ig: ignore().add(fs.readFileSync(giPath, 'utf-8')), - }); - } - } catch { - // Unreadable .gitignore — treat as absent - } - dir = path.dirname(dir); - } - - return matchers; -} - /** * FileWatcher monitors a project directory for changes and triggers * debounced sync operations via a provided callback. * * Design goals: - * - Minimal resource usage (chokidar with .gitignore-aware filtering - * avoids registering inotify watches on excluded directories) + * - Minimal resource usage (chokidar filters excluded directories before + * registering an inotify watch — see module docs / #276) * - Debounced to avoid thrashing on rapid saves * - Filters to supported source files by extension - * - Ignores .codegraph/ directory changes + * - Ignores .codegraph/ and .git/ regardless of .gitignore */ export class FileWatcher { private watcher: FSWatcher | null = null; @@ -98,7 +62,10 @@ export class FileWatcher { private hasChanges = false; private syncing = false; private stopped = false; - private gitignoreMatchers: ScopedIgnore[] = []; + // The shared ignore matcher (built-in defaults + project .gitignore), built + // once at start(). Same source of truth the indexer uses, so watcher scope + // can never diverge from index scope. + private ignoreMatcher: Ignore | null = null; private readonly projectRoot: string; private readonly debounceMs: number; @@ -136,84 +103,36 @@ export class FileWatcher { return false; } - // Load .gitignore rules from project root upward. - // These drive chokidar's `ignored` callback so we never register - // inotify watches on excluded directories (like node_modules/, .git/, - // dist/, .next/, etc.), avoiding kernel watch-budget exhaustion (#276). - this.gitignoreMatchers = loadGitignoreChain(this.projectRoot); + // Reuse the indexer's ignore set so the watcher and indexer agree on scope. + // chokidar only registers an inotify watch on directories that pass this + // filter — that's the #276 fix. + this.ignoreMatcher = buildDefaultIgnore(this.projectRoot); try { this.watcher = chokidar.watch(this.projectRoot, { - // Core fix for #276: filter directories BEFORE they are watched. - // chokidar calls this for every file and directory it encounters, - // and only registers an underlying fs.watch on those that pass. - // This drops per-instance inotify watch count from hundreds of - // thousands (on a monorepo) to hundreds — only the directories - // that actually contain tracked source code. - ignored: (testPath: string) => { - const rel = normalizePath(path.relative(this.projectRoot, testPath)); - - // Always ignore .codegraph/ (our own DB writes) and .git/ - if ( - rel === '.codegraph' || - rel.startsWith('.codegraph/') || - rel === '.git' || - rel.startsWith('.git/') - ) { - return true; - } - - // Check .gitignore rules - for (const { dir, ig } of this.gitignoreMatchers) { - let matcherRel = normalizePath(path.relative(dir, testPath)); - if (!matcherRel || matcherRel.startsWith('..')) continue; - - // For directory-only .gitignore rules (e.g. "build/"), - // append a trailing slash so the ignore package matches them. - try { - const stat = fs.statSync(testPath); - if (stat.isDirectory()) matcherRel += '/'; - } catch { - // If we can't stat, assume it's a file — don't append '/' - } - - if (ig.ignores(matcherRel)) return true; - } - - return false; - }, + // chokidar calls this for every path it encounters and only watches + // those that pass — so excluded trees (node_modules/, dist/, .git/, …) + // never get an inotify watch in the first place. + ignored: (testPath: string, stats?: Stats) => this.shouldIgnore(testPath, stats), }); - // Wire up the file-change handler. chokidar emits 'all' for every - // event type; we only care about files that were actually changed. + // chokidar emits 'all' for every event type; we only sync source files. this.watcher.on('all', (_event: string, filePath: string) => { if (this.stopped) return; const normalized = normalizePath(path.relative(this.projectRoot, filePath)); - // Defense in depth: filter again even though `ignored` should - // have prevented watches on these directories. Events can still - // arrive during watcher setup or from symlink traversal. - if ( - normalized === '.codegraph' || - normalized.startsWith('.codegraph/') || - normalized === '.git' || - normalized.startsWith('.git/') - ) { - return; - } - - // Only sync changes to files we can actually parse. - if (!isSourceFile(normalized)) { - return; - } + // Defense in depth: `ignored` should already keep these out, but events + // can still arrive during setup or via symlink traversal. + if (this.isAlwaysIgnored(normalized)) return; + if (!isSourceFile(normalized)) return; logDebug('File change detected', { file: normalized }); this.hasChanges = true; this.scheduleSync(); }); - // Handle watcher errors gracefully + // Handle watcher errors gracefully — don't crash, the user can restart. this.watcher.on('error', (err: unknown) => { logWarn('File watcher error', { error: String(err) }); }); @@ -221,12 +140,37 @@ export class FileWatcher { logDebug('File watcher started', { projectRoot: this.projectRoot, debounceMs: this.debounceMs }); return true; } catch (err) { - // Watcher setup failed (e.g., permission denied, missing directory) + // Watcher setup failed (e.g., permission denied, missing directory). logWarn('Could not start file watcher', { error: String(err) }); return false; } } + /** Our own dirs are always ignored, regardless of .gitignore. */ + private isAlwaysIgnored(rel: string): boolean { + return ( + rel === '.codegraph' || rel.startsWith('.codegraph/') || + rel === '.git' || rel.startsWith('.git/') + ); + } + + /** + * chokidar `ignored` predicate — true for any path that should NOT be watched. + * Uses chokidar's provided `stats` to decide directory-vs-file so a dir-only + * rule like `build/` matches, without an extra `statSync` per path. + */ + private shouldIgnore(testPath: string, stats?: Stats): boolean { + const rel = normalizePath(path.relative(this.projectRoot, testPath)); + if (!rel || rel === '.' || rel.startsWith('..')) return false; // root / outside + if (this.isAlwaysIgnored(rel)) return true; + if (!this.ignoreMatcher) return false; + if (stats) { + return this.ignoreMatcher.ignores(stats.isDirectory() ? rel + '/' : rel); + } + // Stats unknown: test both forms so a directory match isn't missed. + return this.ignoreMatcher.ignores(rel) || this.ignoreMatcher.ignores(rel + '/'); + } + /** * Stop watching for file changes. */ @@ -244,7 +188,7 @@ export class FileWatcher { } this.hasChanges = false; - this.gitignoreMatchers = []; + this.ignoreMatcher = null; logDebug('File watcher stopped'); }