Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 45 additions & 6 deletions src/mcp.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1779,25 +1779,64 @@ fn handleSearch(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out:
// Over-fetch by `offset` (+1) so we can page into a stable window and
// detect whether more results exist beyond this page. BM25 ranking is
// deterministic per query, so the offset is a stable, stateless cursor.
const fetch_count = @min(offset_n + max_results + 1, 100000);
const fetched = (if (multiword)
const want_count = @min(offset_n + max_results + 1, 100000);
var fetch_count = want_count;
var fetched = (if (multiword)
explorer.searchContentRanked(query, alloc, fetch_count)
else
explorer.searchContent(query, alloc, fetch_count)) catch {
out.appendSlice(alloc, "error: search failed") catch {};
return;
};
// #560: path_glob filters AFTER ranking, so a window of global results
// can hold zero in-glob hits while deeper ranks match — the page must
// be filled from the glob-filtered sequence, not the global one.
// Escalate the fetch window until the in-glob set fills the page or
// the index is exhausted.
if (path_glob) |g| {
while (true) {
var in_glob: usize = 0;
for (fetched) |r| {
if (globMatch(g, r.path)) in_glob += 1;
}
const exhausted = fetched.len < fetch_count;
if (in_glob >= want_count or exhausted or fetch_count >= 100000) break;
fetch_count = @min(fetch_count * 4, 100000);
for (fetched) |r| {
alloc.free(r.line_text);
alloc.free(r.path);
}
alloc.free(fetched);
fetched = (if (multiword)
explorer.searchContentRanked(query, alloc, fetch_count)
else
explorer.searchContent(query, alloc, fetch_count)) catch {
out.appendSlice(alloc, "error: search failed") catch {};
return;
};
}
}
defer {
for (fetched) |r| {
alloc.free(r.line_text);
alloc.free(r.path);
}
alloc.free(fetched);
}
const page_lo = @min(offset_n, fetched.len);
const page_hi = @min(offset_n + max_results, fetched.len);
const results = fetched[page_lo..page_hi];
const has_more = fetched.len > page_hi;
// Page over the glob-filtered view so offset/max_results address
// in-glob results rather than global ranks.
var glob_view: std.ArrayList(explore_mod.SearchResult) = .empty;
defer glob_view.deinit(alloc);
if (path_glob) |g| {
for (fetched) |r| {
if (globMatch(g, r.path)) glob_view.append(alloc, r) catch {};
}
}
const page_src: []const explore_mod.SearchResult = if (path_glob != null) glob_view.items else fetched;
const page_lo = @min(offset_n, page_src.len);
const page_hi = @min(offset_n + max_results, page_src.len);
const results = page_src[page_lo..page_hi];
const has_more = page_src.len > page_hi;
if (json_fmt) {
writeSearchResultsJson(out, alloc, explorer, query, results, page_lo, has_more, paths_only, path_glob, compact);
return;
Expand Down
44 changes: 44 additions & 0 deletions src/test_search.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1666,3 +1666,47 @@ test "issue-546: searchContent rerank penalizes non-source tooling paths (bench/
try testing.expect(results.len >= 5);
try testing.expectEqualStrings("src/sample.zig", results[0].path);
}


test "issue-560: path_glob page must not be starved by higher-ranked out-of-glob files" {
// 40 out-of-glob decoys tie the gold file on score; the path-asc
// tiebreaker ranks lib/ decoys above src/gold.zig, so the gold hit sits
// beyond the fetched window. Pre-fix the handler fetches
// offset+max_results+1 ranked results and only THEN applies path_glob —
// every fetched row is out-of-glob, so the response is '0 results' plus
// a 'more results' hint even though src/gold.zig matches query and glob.
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);

var i: usize = 0;
while (i < 40) : (i += 1) {
const name = try std.fmt.allocPrint(arena.allocator(), "lib/decoy{d:0>2}.zig", .{i});
try explorer.indexFile(name, "pub fn x() void { _ = starveTerm; }\n");
}
try explorer.indexFile("src/gold.zig", "pub fn x() void { _ = starveTerm; }\n");

var store = Store.init(testing.allocator);
defer store.deinit();
var agents = AgentRegistry.init(testing.allocator);
defer agents.deinit();
_ = try agents.register("__filesystem__");

var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, ".", Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer bench_ctx.deinit();

const args_json =
\\{"query":"starveTerm","path_glob":"src/**","max_results":5}
;
const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{});
defer parsed.deinit();

var out: std.ArrayList(u8) = .empty;
defer out.deinit(testing.allocator);
bench_ctx.runDispatch(io, testing.allocator, .codedb_search, &parsed.value.object, &out, &store, &explorer, &agents);

// The in-glob match must be visible.
try testing.expect(std.mem.indexOf(u8, out.items, "src/gold.zig") != null);
// And the header must not claim zero results.
try testing.expect(std.mem.indexOf(u8, out.items, "0 results") == null);
}
Loading