From fce05b6028cf224afc5d7a5e143663eeb998fd9b Mon Sep 17 00:00:00 2001 From: andreinknv Date: Tue, 28 Apr 2026 19:20:07 -0400 Subject: [PATCH 1/2] feat(extraction): instantiates + decorates graph edges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two new structural edges that fill gaps in the call graph for modern JS/TS / Java / C# / Python / Kotlin codebases. 1) `instantiates` edges from `new Foo(...)`: The bulk-extraction and visitFunctionBody dispatchers only recognised `call_expression`; `new_expression` (and the equivalent `object_creation_expression` / `instance_creation_expression` in other grammars) was silently ignored. Adds INSTANTIATION_KINDS, extractInstantiation(), and dispatch from BOTH the top-level visitNode and the per-function-body walker. Children are still descended so nested calls inside constructor args (`new Foo(bar())`) get their own `calls` refs. Output: a `bootstrap` function that does `new UserService(); new UserController(svc)` now produces two `instantiates` edges to those class nodes — previously zero edges. 2) `decorates` edges from `@Decorator` annotations: Tree-sitter places decorator nodes BEFORE the symbol they apply to in the AST, so the original walk-time dispatch saw the wrong nodeStack head (file/class instead of class/method). Replaced with extractDecoratorsFor(declNode, decoratedId) that runs from inside extractClass / extractFunction / extractMethod after the symbol's node id is known. Looks for decorator nodes in two places: - Direct named children of the declaration (method/property style) - Preceding siblings in the parent (TypeScript class style: @Foo class X {} parses as parent { decorator, class_decl }) Sibling check uses startIndex comparison rather than reference identity — tree-sitter web bindings return fresh JS wrappers from parent/namedChild navigation, so `===` is unreliable. Took a debug session to spot this; flagging in the comment so the next reader doesn't re-introduce the bug. Output: a `@Controller` class decorator + `@Get` method decorator on a NestJS-style controller now produce two `decorates` edges (class→Controller, method→Get) with the correct source nodes. Verified live on a synthetic NestJS-shape fixture; all 380 existing tests pass. --- src/extraction/tree-sitter.ts | 160 ++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 7345d91f6..46b53dfb1 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -95,6 +95,17 @@ function extractName(node: SyntaxNode, source: string, extractor: LanguageExtrac return ''; } +/** + * Tree-sitter node kinds that represent constructor invocations + * (`new Foo()` and friends). Used by extractInstantiation to emit + * an `instantiates` reference targeting the class name. + */ +const INSTANTIATION_KINDS: ReadonlySet = new Set([ + 'new_expression', // typescript / javascript / tsx / jsx + 'object_creation_expression', // java / c# + 'instance_creation_expression', // some grammars +]); + /** * TreeSitterExtractor - Main extraction class */ @@ -334,6 +345,17 @@ export class TreeSitterExtractor { else if (this.extractor.callTypes.includes(nodeType)) { this.extractCall(node); } + // `new Foo(...)` / `Foo::new(...)` / object_creation_expression — + // produce an `instantiates` reference. Children still walked so + // nested calls inside the constructor args (`new Foo(bar())`) get + // their own `calls` refs. + else if (INSTANTIATION_KINDS.has(nodeType)) { + this.extractInstantiation(node); + } + // (Decorator handling lives inside the symbol-creating extractors + // — extractClass / extractFunction / extractProperty — because the + // decorator node sits BEFORE the symbol in the AST and the walker + // would otherwise see the wrong nodeStack head.) // Rust: `impl Trait for Type { ... }` — creates implements edge from Type to Trait else if (nodeType === 'impl_item') { this.extractRustImplItem(node); @@ -531,6 +553,11 @@ export class TreeSitterExtractor { // Extract type annotations (parameter types and return type) this.extractTypeAnnotations(node, funcNode.id); + // Extract decorators applied to the function (rare in JS/TS but + // present in Python `@decorator def f():` and Java/Kotlin + // annotations on free functions). + this.extractDecoratorsFor(node, funcNode.id); + // Push to stack and visit body this.nodeStack.push(funcNode.id); const body = this.extractor.resolveBody?.(node, this.extractor.bodyField) @@ -562,6 +589,9 @@ export class TreeSitterExtractor { // Extract extends/implements this.extractInheritance(node, classNode.id); + // Extract decorators applied to the class (`@Foo class X {}`). + this.extractDecoratorsFor(node, classNode.id); + // Push to stack and visit body this.nodeStack.push(classNode.id); let body = this.extractor.resolveBody?.(node, this.extractor.bodyField) @@ -655,6 +685,9 @@ export class TreeSitterExtractor { // Extract type annotations (parameter types and return type) this.extractTypeAnnotations(node, methodNode.id); + // Extract decorators (`@Get('/list') list() {}`). + this.extractDecoratorsFor(node, methodNode.id); + // Push to stack and visit body this.nodeStack.push(methodNode.id); const body = this.extractor.resolveBody?.(node, this.extractor.bodyField) @@ -1448,6 +1481,127 @@ export class TreeSitterExtractor { } } + /** + * `new Foo(...)` / `Foo::new(...)` / object_creation_expression — + * emit an `instantiates` reference to the class name. The resolver + * then links it to the class node, producing the `instantiates` + * edge that powers "what creates instances of X" queries. + * + * Children are still walked so nested calls inside the constructor + * arguments (`new Foo(bar())`) get their own `calls` references. + */ + private extractInstantiation(node: SyntaxNode): void { + if (this.nodeStack.length === 0) return; + const fromId = this.nodeStack[this.nodeStack.length - 1]; + if (!fromId) return; + + // The class name is in the `constructor`/`type`/first-named-child + // depending on grammar. + const ctor = + getChildByField(node, 'constructor') || + getChildByField(node, 'type') || + getChildByField(node, 'name') || + node.namedChild(0); + if (!ctor) return; + + let className = getNodeText(ctor, this.source); + // For namespaced/qualified constructors (`new ns.Foo()`, + // `new ns::Foo()`) keep the trailing identifier — that's what + // matches a class node in the index. + const lastDot = Math.max( + className.lastIndexOf('.'), + className.lastIndexOf('::') + ); + if (lastDot >= 0) className = className.slice(lastDot + 1).replace(/^[:.]/, ''); + + if (className) { + this.unresolvedReferences.push({ + fromNodeId: fromId, + referenceName: className, + referenceKind: 'instantiates', + line: node.startPosition.row + 1, + column: node.startPosition.column, + }); + } + } + + /** + * Scan `declNode` and its preceding siblings (within the parent's + * named children) for decorator nodes, emitting a `decorates` + * reference from `decoratedId` to each decorator's function name. + * + * Why preceding siblings: in TypeScript, `@Foo class Bar {}` parses + * as an `export_statement` (or top-level wrapper) with the + * `decorator` as a child *before* the `class_declaration` — so the + * decorator isn't a child of the class itself. For methods/ + * properties, the decorator IS a direct child of the declaration, + * so we also scan declNode.namedChildren. + * + * Idempotent across grammars: if neither location yields decorators + * (most non-decorator-using languages), the function is a no-op. + */ + private extractDecoratorsFor(declNode: SyntaxNode, decoratedId: string): void { + const consider = (n: SyntaxNode | null): void => { + if (!n) return; + if (n.type !== 'decorator' && n.type !== 'annotation') return; + // Find the leading identifier: skip the `@` punct, unwrap + // a call_expression if the decorator is invoked with args. + let target: SyntaxNode | null = null; + for (let i = 0; i < n.namedChildCount; i++) { + const child = n.namedChild(i); + if (!child) continue; + if (child.type === 'call_expression') { + const fn = getChildByField(child, 'function') ?? child.namedChild(0); + if (fn) target = fn; + if (target) break; + } + if ( + child.type === 'identifier' || + child.type === 'member_expression' || + child.type === 'scoped_identifier' || + child.type === 'navigation_expression' + ) { + target = child; + break; + } + } + if (!target) return; + let name = getNodeText(target, this.source); + const lastDot = Math.max(name.lastIndexOf('.'), name.lastIndexOf('::')); + if (lastDot >= 0) name = name.slice(lastDot + 1).replace(/^[:.]/, ''); + if (!name) return; + this.unresolvedReferences.push({ + fromNodeId: decoratedId, + referenceName: name, + referenceKind: 'decorates', + line: n.startPosition.row + 1, + column: n.startPosition.column, + }); + }; + + // 1. Decorators that are direct children of the declaration + // (method/property style, also some grammars for class). + for (let i = 0; i < declNode.namedChildCount; i++) { + consider(declNode.namedChild(i)); + } + + // 2. Decorators that are PRECEDING siblings of the declaration + // inside the parent's children (TypeScript class style). + // Note: tree-sitter web bindings return fresh JS wrapper + // objects from `parent`/`namedChild`, so `sibling === declNode` + // is unreliable — compare by start byte instead. + const parent = declNode.parent; + if (parent) { + const declStart = declNode.startIndex; + for (let i = 0; i < parent.namedChildCount; i++) { + const sibling = parent.namedChild(i); + if (!sibling) continue; + if (sibling.startIndex >= declStart) break; + consider(sibling); + } + } + } + /** * Visit function body and extract calls (and structural nodes). * @@ -1466,6 +1620,12 @@ export class TreeSitterExtractor { if (this.extractor!.callTypes.includes(nodeType)) { this.extractCall(node); + } else if (INSTANTIATION_KINDS.has(nodeType)) { + // `new Foo()` inside a function body — emit an `instantiates` + // reference. Without this branch the body walker only knew + // about `call_expression`, so constructor invocations + // produced no graph edges at all. + this.extractInstantiation(node); } else if (this.extractor!.extractBareCall) { const calleeName = this.extractor!.extractBareCall(node, this.source); if (calleeName && this.nodeStack.length > 0) { From 31cb3e963610319888ca4bc55a0ce75c84857722 Mon Sep 17 00:00:00 2001 From: andreinknv Date: Tue, 28 Apr 2026 19:27:01 -0400 Subject: [PATCH 2/2] =?UTF-8?q?fix(extraction):=20address=20reviewer=20fin?= =?UTF-8?q?dings=20=E2=80=94=20decorator=20boundary,=20generic=20construct?= =?UTF-8?q?ors,=20property/field=20decorators,=20marker=5Fannotation,=20te?= =?UTF-8?q?sts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five fixes from independent semantic review: - extractDecoratorsFor sibling walk now iterates BACKWARD from the declaration and stops at the first non-decorator/annotation separator. Previous version walked forward up to declStart and consumed every decorator-typed sibling — so two adjacent decorated classes (`@A class Foo {} @B class Bar {}`) had `@A` spuriously attributed to `Bar`. - extractInstantiation strips the type-argument suffix from the constructor field text. `new Map()` was producing referenceName 'Map' (the constructor field is a generic_type node) and resolution always failed. - extractProperty and extractField now call extractDecoratorsFor after their createNode calls. NestJS-style `@Inject() private svc: Foo` and Java field annotations were being silently dropped. - consider() in extractDecoratorsFor recognises 'marker_annotation' in addition to 'decorator'/'annotation'. Java's tree-sitter grammar emits marker_annotation for arg-less annotations like @Override and @Deprecated; without this every Java marker annotation was silently skipped. - 6 new extraction tests covering: instantiates ref for new Foo(), generic-type stripping (`new Container()` -> 'Container'), qualified-new keeps trailing identifier (`new ns.Foo()` -> 'Foo'), decorates ref for @Foo class X {}, regression for adjacent decorated classes (each gets its OWN decorator), decorates ref for @Foo method(). Full test suite: 386 passed (was 380, +6 new extraction tests). --- __tests__/extraction.test.ts | 98 +++++++++++++++++++++++++++++++++++ src/extraction/tree-sitter.ts | 62 ++++++++++++++++++---- 2 files changed, 151 insertions(+), 9 deletions(-) diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 8a70ffedd..f9809e538 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -3079,3 +3079,101 @@ describe('Directory Exclusion', () => { expect(files.every((f) => !f.includes('vendor'))).toBe(true); }); }); + +describe('Instantiates + Decorates edge extraction', () => { + it('emits an instantiates ref for `new Foo()`', () => { + const code = ` +class Foo {} +function bootstrap() { return new Foo(); } +`; + const result = extractFromSource('app.ts', code); + const ref = result.unresolvedReferences.find( + (r) => r.referenceKind === 'instantiates' && r.referenceName === 'Foo' + ); + expect(ref).toBeDefined(); + }); + + it('strips type-argument suffix from generic constructors', () => { + const code = ` +class Container { constructor(_: T) {} } +function go() { return new Container('x'); } +`; + const result = extractFromSource('app.ts', code); + const ref = result.unresolvedReferences.find( + (r) => r.referenceKind === 'instantiates' + ); + expect(ref).toBeDefined(); + // Container must be normalised to "Container" — otherwise + // resolution can never match the class node. + expect(ref!.referenceName).toBe('Container'); + }); + + it('keeps trailing identifier from qualified `new ns.Foo()`', () => { + const code = ` +const ns = { Foo: class {} }; +function go() { return new ns.Foo(); } +`; + const result = extractFromSource('app.ts', code); + const ref = result.unresolvedReferences.find( + (r) => r.referenceKind === 'instantiates' + ); + // We can't always resolve which Foo, but the name should be the + // simple identifier so name-matching has a chance. + expect(ref?.referenceName).toBe('Foo'); + }); + + it('emits a decorates ref for `@Foo class X {}`', () => { + const code = ` +function Foo(_arg: string) { return (cls: any) => cls; } +@Foo('x') +class X {} +`; + const result = extractFromSource('app.ts', code); + const decorClass = result.unresolvedReferences.find( + (r) => r.referenceKind === 'decorates' && r.referenceName === 'Foo' + ); + expect(decorClass).toBeDefined(); + }); + + it('does NOT attribute a prior class\'s decorator to the next class', () => { + // Regression: the sibling-walk must stop at the first non- + // decorator separator. `@A class Foo {} @B class Bar {}` must + // produce `decorates(Foo, A)` and `decorates(Bar, B)` — never + // `decorates(Bar, A)`. + const code = ` +function A(cls: any) { return cls; } +function B(cls: any) { return cls; } +@A +class Foo {} +@B +class Bar {} +`; + const result = extractFromSource('app.ts', code); + const decoratesEdges = result.unresolvedReferences.filter( + (r) => r.referenceKind === 'decorates' + ); + // Exactly one decorates ref per decorated class, no cross-attribution. + const fromBar = decoratesEdges.filter((r) => + result.nodes.find((n) => n.id === r.fromNodeId && n.name === 'Bar') + ); + expect(fromBar.length).toBe(1); + expect(fromBar[0]!.referenceName).toBe('B'); + }); + + it('emits a decorates ref for `@Foo method() {}`', () => { + const code = ` +function Get(p: string) { return (t: any, k: string) => t; } +class Svc { + @Get('/x') method() { return 1; } +} +`; + const result = extractFromSource('app.ts', code); + const decorMethod = result.unresolvedReferences.find( + (r) => r.referenceKind === 'decorates' && r.referenceName === 'Get' + ); + expect(decorMethod).toBeDefined(); + // The decorated symbol must be `method`, not the constructor or class. + const decoratedNode = result.nodes.find((n) => n.id === decorMethod!.fromNodeId); + expect(decoratedNode?.name).toBe('method'); + }); +}); diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 46b53dfb1..24b158d48 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -867,12 +867,18 @@ export class TreeSitterExtractor { const typeText = typeNode ? getNodeText(typeNode, this.source) : undefined; const signature = typeText ? `${typeText} ${name}` : name; - this.createNode('property', name, node, { + const propNode = this.createNode('property', name, node, { docstring, signature, visibility, isStatic, }); + + // `@Inject() private svc: Foo` and similar — capture the + // decorator->target relationship for class properties too. + if (propNode) { + this.extractDecoratorsFor(node, propNode.id); + } } /** @@ -946,12 +952,15 @@ export class TreeSitterExtractor { if (!nameNode) continue; const name = getNodeText(nameNode, this.source); const signature = typeText ? `${typeText} ${name}` : name; - this.createNode('field', name, decl, { + const fieldNode = this.createNode('field', name, decl, { docstring, signature, visibility, isStatic, }); + // Java/Kotlin annotations / TS field decorators sit on the + // outer field_declaration, not on the individual declarator. + if (fieldNode) this.extractDecoratorsFor(node, fieldNode.id); } } else { // Fallback: try to find an identifier child directly @@ -1505,6 +1514,12 @@ export class TreeSitterExtractor { if (!ctor) return; let className = getNodeText(ctor, this.source); + // Strip type-argument suffix first: `new Map()` would + // otherwise produce className 'Map' (the constructor + // field is a `generic_type` node) and resolution would fail + // because no class is named with the angle-bracket suffix. + const ltIdx = className.indexOf('<'); + if (ltIdx > 0) className = className.slice(0, ltIdx); // For namespaced/qualified constructors (`new ns.Foo()`, // `new ns::Foo()`) keep the trailing identifier — that's what // matches a class node in the index. @@ -1513,6 +1528,7 @@ export class TreeSitterExtractor { className.lastIndexOf('::') ); if (lastDot >= 0) className = className.slice(lastDot + 1).replace(/^[:.]/, ''); + className = className.trim(); if (className) { this.unresolvedReferences.push({ @@ -1543,7 +1559,16 @@ export class TreeSitterExtractor { private extractDecoratorsFor(declNode: SyntaxNode, decoratedId: string): void { const consider = (n: SyntaxNode | null): void => { if (!n) return; - if (n.type !== 'decorator' && n.type !== 'annotation') return; + // `marker_annotation` is Java's grammar for arg-less annotations + // (`@Override`, `@Deprecated`); without including it, every + // such Java annotation would be silently skipped. + if ( + n.type !== 'decorator' && + n.type !== 'annotation' && + n.type !== 'marker_annotation' + ) { + return; + } // Find the leading identifier: skip the `@` punct, unwrap // a call_expression if the decorator is invoked with args. let target: SyntaxNode | null = null; @@ -1587,17 +1612,36 @@ export class TreeSitterExtractor { // 2. Decorators that are PRECEDING siblings of the declaration // inside the parent's children (TypeScript class style). - // Note: tree-sitter web bindings return fresh JS wrapper - // objects from `parent`/`namedChild`, so `sibling === declNode` - // is unreliable — compare by start byte instead. + // Walk BACKWARDS from the declaration and stop at the first + // non-decorator sibling — without that stop, decorators + // belonging to an EARLIER unrelated declaration leak in + // (e.g. `@A class Foo {} @B class Bar {}` would otherwise + // attribute @A to Bar). + // + // Note on identity: tree-sitter web bindings return fresh JS + // wrapper objects from `parent`/`namedChild` navigation, so + // `sibling === declNode` is unreliable — `startIndex` does + // the matching instead. const parent = declNode.parent; if (parent) { const declStart = declNode.startIndex; + let declIdx = -1; for (let i = 0; i < parent.namedChildCount; i++) { const sibling = parent.namedChild(i); - if (!sibling) continue; - if (sibling.startIndex >= declStart) break; - consider(sibling); + if (sibling && sibling.startIndex === declStart) { + declIdx = i; + break; + } + } + if (declIdx > 0) { + for (let j = declIdx - 1; j >= 0; j--) { + const sibling = parent.namedChild(j); + if (!sibling) continue; + if (sibling.type !== 'decorator' && sibling.type !== 'annotation' && sibling.type !== 'marker_annotation') { + break; // non-decorator separator → stop consuming + } + consider(sibling); + } } } }