-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Expand file tree
/
Copy pathfetchUrlContent.spec.ts
More file actions
118 lines (94 loc) · 3.43 KB
/
fetchUrlContent.spec.ts
File metadata and controls
118 lines (94 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
// npx vitest core/mentions/__tests__/fetchUrlContent.spec.ts
import axios from "axios"
import { fetchUrlContent } from "../fetchUrlContent"
vi.mock("axios")
describe("fetchUrlContent", () => {
beforeEach(() => {
vi.clearAllMocks()
})
it("should fetch and extract text from HTML content", async () => {
vi.mocked(axios.get).mockResolvedValueOnce({
headers: { "content-type": "text/html; charset=utf-8" },
data: `
<html>
<head><title>Test Page</title></head>
<body>
<script>console.log("ignore me")</script>
<style>.ignore { display: none; }</style>
<nav>Navigation links</nav>
<main>
<h1>Hello World</h1>
<p>This is the main content of the page.</p>
</main>
<footer>Footer content</footer>
</body>
</html>
`,
})
const result = await fetchUrlContent("https://example.com")
expect(result.url).toBe("https://example.com")
expect(result.content).toContain("Hello World")
expect(result.content).toContain("This is the main content of the page.")
// Script/style/nav/footer should be removed
expect(result.content).not.toContain("ignore me")
expect(result.content).not.toContain("Navigation links")
expect(result.content).not.toContain("Footer content")
expect(result.truncated).toBe(false)
})
it("should return raw text for non-HTML content", async () => {
vi.mocked(axios.get).mockResolvedValueOnce({
headers: { "content-type": "text/plain" },
data: "Plain text content from the URL",
})
const result = await fetchUrlContent("https://example.com/file.txt")
expect(result.content).toBe("Plain text content from the URL")
expect(result.truncated).toBe(false)
})
it("should handle JSON content type as raw text", async () => {
vi.mocked(axios.get).mockResolvedValueOnce({
headers: { "content-type": "application/json" },
data: '{"key": "value"}',
})
const result = await fetchUrlContent("https://example.com/api/data")
expect(result.content).toBe('{"key": "value"}')
})
it("should truncate content that exceeds the max length", async () => {
const longContent = "x".repeat(60_000)
vi.mocked(axios.get).mockResolvedValueOnce({
headers: { "content-type": "text/plain" },
data: longContent,
})
const result = await fetchUrlContent("https://example.com/large")
expect(result.truncated).toBe(true)
expect(result.content.length).toBe(50_000)
})
it("should propagate axios errors", async () => {
vi.mocked(axios.get).mockRejectedValueOnce(new Error("Request failed with status code 404"))
await expect(fetchUrlContent("https://example.com/not-found")).rejects.toThrow(
"Request failed with status code 404",
)
})
it("should use body as fallback when no main/article element exists", async () => {
vi.mocked(axios.get).mockResolvedValueOnce({
headers: { "content-type": "text/html" },
data: `
<html>
<body>
<div>Some body content without semantic elements</div>
</body>
</html>
`,
})
const result = await fetchUrlContent("https://example.com/simple")
expect(result.content).toContain("Some body content without semantic elements")
})
it("should handle missing content-type header", async () => {
vi.mocked(axios.get).mockResolvedValueOnce({
headers: {},
data: "Some raw content",
})
const result = await fetchUrlContent("https://example.com/unknown")
// With no content-type, it falls through to the non-HTML path
expect(result.content).toBe("Some raw content")
})
})