Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions __tests__/extraction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3079,3 +3079,101 @@ describe('Directory Exclusion', () => {
expect(files.every((f) => !f.includes('vendor'))).toBe(true);
});
});

describe('Instantiates + Decorates edge extraction', () => {
it('emits an instantiates ref for `new Foo()`', () => {
const code = `
class Foo {}
function bootstrap() { return new Foo(); }
`;
const result = extractFromSource('app.ts', code);
const ref = result.unresolvedReferences.find(
(r) => r.referenceKind === 'instantiates' && r.referenceName === 'Foo'
);
expect(ref).toBeDefined();
});

it('strips type-argument suffix from generic constructors', () => {
const code = `
class Container<T> { constructor(_: T) {} }
function go() { return new Container<string>('x'); }
`;
const result = extractFromSource('app.ts', code);
const ref = result.unresolvedReferences.find(
(r) => r.referenceKind === 'instantiates'
);
expect(ref).toBeDefined();
// Container<string> must be normalised to "Container" — otherwise
// resolution can never match the class node.
expect(ref!.referenceName).toBe('Container');
});

it('keeps trailing identifier from qualified `new ns.Foo()`', () => {
const code = `
const ns = { Foo: class {} };
function go() { return new ns.Foo(); }
`;
const result = extractFromSource('app.ts', code);
const ref = result.unresolvedReferences.find(
(r) => r.referenceKind === 'instantiates'
);
// We can't always resolve which Foo, but the name should be the
// simple identifier so name-matching has a chance.
expect(ref?.referenceName).toBe('Foo');
});

it('emits a decorates ref for `@Foo class X {}`', () => {
const code = `
function Foo(_arg: string) { return (cls: any) => cls; }
@Foo('x')
class X {}
`;
const result = extractFromSource('app.ts', code);
const decorClass = result.unresolvedReferences.find(
(r) => r.referenceKind === 'decorates' && r.referenceName === 'Foo'
);
expect(decorClass).toBeDefined();
});

it('does NOT attribute a prior class\'s decorator to the next class', () => {
// Regression: the sibling-walk must stop at the first non-
// decorator separator. `@A class Foo {} @B class Bar {}` must
// produce `decorates(Foo, A)` and `decorates(Bar, B)` — never
// `decorates(Bar, A)`.
const code = `
function A(cls: any) { return cls; }
function B(cls: any) { return cls; }
@A
class Foo {}
@B
class Bar {}
`;
const result = extractFromSource('app.ts', code);
const decoratesEdges = result.unresolvedReferences.filter(
(r) => r.referenceKind === 'decorates'
);
// Exactly one decorates ref per decorated class, no cross-attribution.
const fromBar = decoratesEdges.filter((r) =>
result.nodes.find((n) => n.id === r.fromNodeId && n.name === 'Bar')
);
expect(fromBar.length).toBe(1);
expect(fromBar[0]!.referenceName).toBe('B');
});

it('emits a decorates ref for `@Foo method() {}`', () => {
const code = `
function Get(p: string) { return (t: any, k: string) => t; }
class Svc {
@Get('/x') method() { return 1; }
}
`;
const result = extractFromSource('app.ts', code);
const decorMethod = result.unresolvedReferences.find(
(r) => r.referenceKind === 'decorates' && r.referenceName === 'Get'
);
expect(decorMethod).toBeDefined();
// The decorated symbol must be `method`, not the constructor or class.
const decoratedNode = result.nodes.find((n) => n.id === decorMethod!.fromNodeId);
expect(decoratedNode?.name).toBe('method');
});
});
208 changes: 206 additions & 2 deletions src/extraction/tree-sitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,17 @@ function extractName(node: SyntaxNode, source: string, extractor: LanguageExtrac
return '<anonymous>';
}

/**
* Tree-sitter node kinds that represent constructor invocations
* (`new Foo()` and friends). Used by extractInstantiation to emit
* an `instantiates` reference targeting the class name.
*/
const INSTANTIATION_KINDS: ReadonlySet<string> = new Set([
'new_expression', // typescript / javascript / tsx / jsx
'object_creation_expression', // java / c#
'instance_creation_expression', // some grammars
]);

/**
* TreeSitterExtractor - Main extraction class
*/
Expand Down Expand Up @@ -334,6 +345,17 @@ export class TreeSitterExtractor {
else if (this.extractor.callTypes.includes(nodeType)) {
this.extractCall(node);
}
// `new Foo(...)` / `Foo::new(...)` / object_creation_expression —
// produce an `instantiates` reference. Children still walked so
// nested calls inside the constructor args (`new Foo(bar())`) get
// their own `calls` refs.
else if (INSTANTIATION_KINDS.has(nodeType)) {
this.extractInstantiation(node);
}
// (Decorator handling lives inside the symbol-creating extractors
// — extractClass / extractFunction / extractProperty — because the
// decorator node sits BEFORE the symbol in the AST and the walker
// would otherwise see the wrong nodeStack head.)
// Rust: `impl Trait for Type { ... }` — creates implements edge from Type to Trait
else if (nodeType === 'impl_item') {
this.extractRustImplItem(node);
Expand Down Expand Up @@ -531,6 +553,11 @@ export class TreeSitterExtractor {
// Extract type annotations (parameter types and return type)
this.extractTypeAnnotations(node, funcNode.id);

// Extract decorators applied to the function (rare in JS/TS but
// present in Python `@decorator def f():` and Java/Kotlin
// annotations on free functions).
this.extractDecoratorsFor(node, funcNode.id);

// Push to stack and visit body
this.nodeStack.push(funcNode.id);
const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
Expand Down Expand Up @@ -562,6 +589,9 @@ export class TreeSitterExtractor {
// Extract extends/implements
this.extractInheritance(node, classNode.id);

// Extract decorators applied to the class (`@Foo class X {}`).
this.extractDecoratorsFor(node, classNode.id);

// Push to stack and visit body
this.nodeStack.push(classNode.id);
let body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
Expand Down Expand Up @@ -655,6 +685,9 @@ export class TreeSitterExtractor {
// Extract type annotations (parameter types and return type)
this.extractTypeAnnotations(node, methodNode.id);

// Extract decorators (`@Get('/list') list() {}`).
this.extractDecoratorsFor(node, methodNode.id);

// Push to stack and visit body
this.nodeStack.push(methodNode.id);
const body = this.extractor.resolveBody?.(node, this.extractor.bodyField)
Expand Down Expand Up @@ -834,12 +867,18 @@ export class TreeSitterExtractor {
const typeText = typeNode ? getNodeText(typeNode, this.source) : undefined;
const signature = typeText ? `${typeText} ${name}` : name;

this.createNode('property', name, node, {
const propNode = this.createNode('property', name, node, {
docstring,
signature,
visibility,
isStatic,
});

// `@Inject() private svc: Foo` and similar — capture the
// decorator->target relationship for class properties too.
if (propNode) {
this.extractDecoratorsFor(node, propNode.id);
}
}

/**
Expand Down Expand Up @@ -913,12 +952,15 @@ export class TreeSitterExtractor {
if (!nameNode) continue;
const name = getNodeText(nameNode, this.source);
const signature = typeText ? `${typeText} ${name}` : name;
this.createNode('field', name, decl, {
const fieldNode = this.createNode('field', name, decl, {
docstring,
signature,
visibility,
isStatic,
});
// Java/Kotlin annotations / TS field decorators sit on the
// outer field_declaration, not on the individual declarator.
if (fieldNode) this.extractDecoratorsFor(node, fieldNode.id);
}
} else {
// Fallback: try to find an identifier child directly
Expand Down Expand Up @@ -1448,6 +1490,162 @@ export class TreeSitterExtractor {
}
}

/**
* `new Foo(...)` / `Foo::new(...)` / object_creation_expression —
* emit an `instantiates` reference to the class name. The resolver
* then links it to the class node, producing the `instantiates`
* edge that powers "what creates instances of X" queries.
*
* Children are still walked so nested calls inside the constructor
* arguments (`new Foo(bar())`) get their own `calls` references.
*/
private extractInstantiation(node: SyntaxNode): void {
if (this.nodeStack.length === 0) return;
const fromId = this.nodeStack[this.nodeStack.length - 1];
if (!fromId) return;

// The class name is in the `constructor`/`type`/first-named-child
// depending on grammar.
const ctor =
getChildByField(node, 'constructor') ||
getChildByField(node, 'type') ||
getChildByField(node, 'name') ||
node.namedChild(0);
if (!ctor) return;

let className = getNodeText(ctor, this.source);
// Strip type-argument suffix first: `new Map<K, V>()` would
// otherwise produce className 'Map<K, V>' (the constructor
// field is a `generic_type` node) and resolution would fail
// because no class is named with the angle-bracket suffix.
const ltIdx = className.indexOf('<');
if (ltIdx > 0) className = className.slice(0, ltIdx);
// For namespaced/qualified constructors (`new ns.Foo()`,
// `new ns::Foo()`) keep the trailing identifier — that's what
// matches a class node in the index.
const lastDot = Math.max(
className.lastIndexOf('.'),
className.lastIndexOf('::')
);
if (lastDot >= 0) className = className.slice(lastDot + 1).replace(/^[:.]/, '');
className = className.trim();

if (className) {
this.unresolvedReferences.push({
fromNodeId: fromId,
referenceName: className,
referenceKind: 'instantiates',
line: node.startPosition.row + 1,
column: node.startPosition.column,
});
}
}

/**
* Scan `declNode` and its preceding siblings (within the parent's
* named children) for decorator nodes, emitting a `decorates`
* reference from `decoratedId` to each decorator's function name.
*
* Why preceding siblings: in TypeScript, `@Foo class Bar {}` parses
* as an `export_statement` (or top-level wrapper) with the
* `decorator` as a child *before* the `class_declaration` — so the
* decorator isn't a child of the class itself. For methods/
* properties, the decorator IS a direct child of the declaration,
* so we also scan declNode.namedChildren.
*
* Idempotent across grammars: if neither location yields decorators
* (most non-decorator-using languages), the function is a no-op.
*/
private extractDecoratorsFor(declNode: SyntaxNode, decoratedId: string): void {
const consider = (n: SyntaxNode | null): void => {
if (!n) return;
// `marker_annotation` is Java's grammar for arg-less annotations
// (`@Override`, `@Deprecated`); without including it, every
// such Java annotation would be silently skipped.
if (
n.type !== 'decorator' &&
n.type !== 'annotation' &&
n.type !== 'marker_annotation'
) {
return;
}
// Find the leading identifier: skip the `@` punct, unwrap
// a call_expression if the decorator is invoked with args.
let target: SyntaxNode | null = null;
for (let i = 0; i < n.namedChildCount; i++) {
const child = n.namedChild(i);
if (!child) continue;
if (child.type === 'call_expression') {
const fn = getChildByField(child, 'function') ?? child.namedChild(0);
if (fn) target = fn;
if (target) break;
}
if (
child.type === 'identifier' ||
child.type === 'member_expression' ||
child.type === 'scoped_identifier' ||
child.type === 'navigation_expression'
) {
target = child;
break;
}
}
if (!target) return;
let name = getNodeText(target, this.source);
const lastDot = Math.max(name.lastIndexOf('.'), name.lastIndexOf('::'));
if (lastDot >= 0) name = name.slice(lastDot + 1).replace(/^[:.]/, '');
if (!name) return;
this.unresolvedReferences.push({
fromNodeId: decoratedId,
referenceName: name,
referenceKind: 'decorates',
line: n.startPosition.row + 1,
column: n.startPosition.column,
});
};

// 1. Decorators that are direct children of the declaration
// (method/property style, also some grammars for class).
for (let i = 0; i < declNode.namedChildCount; i++) {
consider(declNode.namedChild(i));
}

// 2. Decorators that are PRECEDING siblings of the declaration
// inside the parent's children (TypeScript class style).
// Walk BACKWARDS from the declaration and stop at the first
// non-decorator sibling — without that stop, decorators
// belonging to an EARLIER unrelated declaration leak in
// (e.g. `@A class Foo {} @B class Bar {}` would otherwise
// attribute @A to Bar).
//
// Note on identity: tree-sitter web bindings return fresh JS
// wrapper objects from `parent`/`namedChild` navigation, so
// `sibling === declNode` is unreliable — `startIndex` does
// the matching instead.
const parent = declNode.parent;
if (parent) {
const declStart = declNode.startIndex;
let declIdx = -1;
for (let i = 0; i < parent.namedChildCount; i++) {
const sibling = parent.namedChild(i);
if (sibling && sibling.startIndex === declStart) {
declIdx = i;
break;
}
}
if (declIdx > 0) {
for (let j = declIdx - 1; j >= 0; j--) {
const sibling = parent.namedChild(j);
if (!sibling) continue;
if (sibling.type !== 'decorator' && sibling.type !== 'annotation' && sibling.type !== 'marker_annotation') {
break; // non-decorator separator → stop consuming
}
consider(sibling);
}
}
}
}

/**
* Visit function body and extract calls (and structural nodes).
*
Expand All @@ -1466,6 +1664,12 @@ export class TreeSitterExtractor {

if (this.extractor!.callTypes.includes(nodeType)) {
this.extractCall(node);
} else if (INSTANTIATION_KINDS.has(nodeType)) {
// `new Foo()` inside a function body — emit an `instantiates`
// reference. Without this branch the body walker only knew
// about `call_expression`, so constructor invocations
// produced no graph edges at all.
this.extractInstantiation(node);
} else if (this.extractor!.extractBareCall) {
const calleeName = this.extractor!.extractBareCall(node, this.source);
if (calleeName && this.nodeStack.length > 0) {
Expand Down