|
| 1 | +import { describe, it, expect, beforeEach } from "vitest"; |
| 2 | +import { HybridSearchService, reciprocalRankFusion } from "../hybridSearch.js"; |
| 3 | +import { KeywordSearchEngine } from "../keywordSearch.js"; |
| 4 | +import { SemanticSearchService } from "../semanticSearch.js"; |
| 5 | +import { EmbeddingService } from "../embeddings.js"; |
| 6 | +import { VectorStore } from "../vectorStore.js"; |
| 7 | +import type { EmbedFn } from "../embeddings.js"; |
| 8 | +import type { SearchResult, TextChunk } from "../types.js"; |
| 9 | + |
| 10 | + |
| 11 | +/** Topic-aware mock embedder — mirrors semanticSearch.test.ts convention. */ |
| 12 | +function createMockEmbedFn(dimensions: number = 384): EmbedFn { |
| 13 | + return async (texts: string[]): Promise<number[][]> => { |
| 14 | + return texts.map((text) => { |
| 15 | + const vector = new Array(dimensions).fill(0); |
| 16 | + const lower = text.toLowerCase(); |
| 17 | + if (lower.includes("machine learning") || lower.includes("neural")) { |
| 18 | + vector[0] = 0.8; vector[1] = 0.6; |
| 19 | + } |
| 20 | + if (lower.includes("cooking") || lower.includes("recipe")) { |
| 21 | + vector[2] = 0.8; vector[3] = 0.6; |
| 22 | + } |
| 23 | + if (lower.includes("typescript") || lower.includes("javascript")) { |
| 24 | + vector[4] = 0.8; vector[5] = 0.6; |
| 25 | + } |
| 26 | + const norm = Math.sqrt(vector.reduce((s, v) => s + v * v, 0)); |
| 27 | + return norm > 0 ? vector.map((v) => v / norm) : vector; |
| 28 | + }); |
| 29 | + }; |
| 30 | +} |
| 31 | + |
| 32 | +function makeResult(noteId: string, chunkIndex: number, score: number): SearchResult { |
| 33 | + const chunk: TextChunk = { |
| 34 | + id: `${noteId}:${chunkIndex}`, |
| 35 | + noteId, |
| 36 | + content: "dummy", |
| 37 | + chunkIndex, |
| 38 | + }; |
| 39 | + return { chunk, score }; |
| 40 | +} |
| 41 | + |
| 42 | +function buildService(embedFn?: EmbedFn) { |
| 43 | + const embeddingService = new EmbeddingService({}, embedFn ?? createMockEmbedFn()); |
| 44 | + const vectorStore = new VectorStore({ |
| 45 | + persistDir: "/tmp/hybrid-test", |
| 46 | + indexFilename: "test-index.json", |
| 47 | + }); |
| 48 | + const semantic = new SemanticSearchService(embeddingService, vectorStore); |
| 49 | + const keyword = new KeywordSearchEngine(); |
| 50 | + return new HybridSearchService(semantic, keyword); |
| 51 | +} |
| 52 | + |
| 53 | +describe("reciprocalRankFusion", () => { |
| 54 | + it("returns empty for empty inputs", () => { |
| 55 | + expect(reciprocalRankFusion([])).toEqual([]); |
| 56 | + expect(reciprocalRankFusion([{ results: [], weight: 1 }])).toEqual([]); |
| 57 | + }); |
| 58 | + |
| 59 | + it("accumulates scores for items appearing in multiple lists", () => { |
| 60 | + const list1 = [makeResult("n1", 0, 0.9), makeResult("n2", 0, 0.5)]; |
| 61 | + const list2 = [makeResult("n2", 0, 0.8), makeResult("n3", 0, 0.4)]; |
| 62 | + const fused = reciprocalRankFusion( |
| 63 | + [{ results: list1, weight: 1 }, { results: list2, weight: 1 }], |
| 64 | + 60, |
| 65 | + ); |
| 66 | + const n2 = fused.find((r) => r.chunk.noteId === "n2")!; |
| 67 | + const n1 = fused.find((r) => r.chunk.noteId === "n1")!; |
| 68 | + expect(n2.score).toBeGreaterThan(n1.score); |
| 69 | + }); |
| 70 | + |
| 71 | + it("includes items from disjoint lists", () => { |
| 72 | + const list1 = [makeResult("n1", 0, 0.9)]; |
| 73 | + const list2 = [makeResult("n2", 0, 0.9)]; |
| 74 | + const fused = reciprocalRankFusion( |
| 75 | + [{ results: list1, weight: 1 }, { results: list2, weight: 1 }], |
| 76 | + ); |
| 77 | + const noteIds = fused.map((r) => r.chunk.noteId); |
| 78 | + expect(noteIds).toContain("n1"); |
| 79 | + expect(noteIds).toContain("n2"); |
| 80 | + }); |
| 81 | + |
| 82 | + it("respects weights — higher-weight list dominates when items differ", () => { |
| 83 | + // n1 is rank-1 in the low-weight list; n2 is rank-1 in the high-weight list |
| 84 | + const lowWeight = [makeResult("n1", 0, 0.99)]; |
| 85 | + const highWeight = [makeResult("n2", 0, 0.99)]; |
| 86 | + const fused = reciprocalRankFusion( |
| 87 | + [{ results: lowWeight, weight: 0.1 }, { results: highWeight, weight: 10 }], |
| 88 | + 60, |
| 89 | + ); |
| 90 | + expect(fused[0].chunk.noteId).toBe("n2"); |
| 91 | + }); |
| 92 | + |
| 93 | +}); |
| 94 | + |
| 95 | +describe("HybridSearchService", () => { |
| 96 | + let service: HybridSearchService; |
| 97 | + |
| 98 | + beforeEach(async () => { |
| 99 | + service = buildService(); |
| 100 | + await service.initialize(); |
| 101 | + }); |
| 102 | + |
| 103 | + describe("indexNote + search round-trip", () => { |
| 104 | + it("finds a note after indexing", async () => { |
| 105 | + await service.indexNote("ml-note", "machine learning and neural networks"); |
| 106 | + const results = await service.search("machine learning"); |
| 107 | + const noteIds = results.map((r) => r.chunk.noteId); |
| 108 | + expect(noteIds).toContain("ml-note"); |
| 109 | + }); |
| 110 | + |
| 111 | + it("returns results from keyword-only match", async () => { |
| 112 | + // Index a note whose content won't produce a high semantic score |
| 113 | + // (all zeros in mock embedder), but has exact keyword matches. |
| 114 | + await service.indexNote("kw-note", "zygomorphic floriculture botany"); |
| 115 | + const results = await service.search("zygomorphic floriculture", { |
| 116 | + topK: 5, |
| 117 | + }); |
| 118 | + const noteIds = results.map((r) => r.chunk.noteId); |
| 119 | + expect(noteIds).toContain("kw-note"); |
| 120 | + }); |
| 121 | + |
| 122 | + it("a note matching both modalities ranks at the top", async () => { |
| 123 | + await service.indexNote( |
| 124 | + "both-note", |
| 125 | + "machine learning neural networks deep learning", |
| 126 | + ); |
| 127 | + await service.indexNote("kw-only-note", "zygomorphic floriculture exotic"); |
| 128 | + const results = await service.search("machine learning"); |
| 129 | + expect(results[0].chunk.noteId).toBe("both-note"); |
| 130 | + }); |
| 131 | + }); |
| 132 | + |
| 133 | + describe("search options", () => { |
| 134 | + beforeEach(async () => { |
| 135 | + await service.indexNote("ml-note", "machine learning and neural networks"); |
| 136 | + await service.indexNote("cooking-note", "pasta cooking recipe dinner"); |
| 137 | + await service.indexNote("ts-note", "typescript javascript programming"); |
| 138 | + }); |
| 139 | + |
| 140 | + it("respects topK", async () => { |
| 141 | + const results = await service.search("general query text", { topK: 2 }); |
| 142 | + expect(results.length).toBeLessThanOrEqual(2); |
| 143 | + }); |
| 144 | + |
| 145 | + it("filters results by noteId", async () => { |
| 146 | + const results = await service.search("any query", { |
| 147 | + noteId: "cooking-note", |
| 148 | + topK: 10, |
| 149 | + }); |
| 150 | + expect(results.length).toBeGreaterThan(0); |
| 151 | + expect(results.every((r) => r.chunk.noteId === "cooking-note")).toBe(true); |
| 152 | + }); |
| 153 | + |
| 154 | + it("zeroing semanticWeight degrades to keyword-only ranking", async () => { |
| 155 | + const results = await service.search("machine learning", { |
| 156 | + semanticWeight: 0, |
| 157 | + keywordWeight: 1, |
| 158 | + topK: 5, |
| 159 | + }); |
| 160 | + const noteIds = results.map((r) => r.chunk.noteId); |
| 161 | + expect(noteIds).toContain("ml-note"); |
| 162 | + }); |
| 163 | + |
| 164 | + it("zeroing keywordWeight degrades to semantic-only ranking", async () => { |
| 165 | + const results = await service.search("machine learning", { |
| 166 | + semanticWeight: 1, |
| 167 | + keywordWeight: 0, |
| 168 | + topK: 5, |
| 169 | + }); |
| 170 | + expect(results[0].chunk.noteId).toBe("ml-note"); |
| 171 | + }); |
| 172 | + }); |
| 173 | + |
| 174 | + describe("removeNote", () => { |
| 175 | + it("removed note does not appear in results", async () => { |
| 176 | + await service.indexNote("ml-note", "machine learning neural networks"); |
| 177 | + service.removeNote("ml-note"); |
| 178 | + const results = await service.search("machine learning", { topK: 10 }); |
| 179 | + expect(results.every((r) => r.chunk.noteId !== "ml-note")).toBe(true); |
| 180 | + }); |
| 181 | + }); |
| 182 | + |
| 183 | + describe("empty index", () => { |
| 184 | + it("returns empty array without crashing", async () => { |
| 185 | + const results = await service.search("anything"); |
| 186 | + expect(results).toEqual([]); |
| 187 | + }); |
| 188 | + }); |
| 189 | +}); |
0 commit comments