-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_improvements.py
More file actions
57 lines (48 loc) · 2.12 KB
/
test_improvements.py
File metadata and controls
57 lines (48 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
"""Test script for improved comprehensive processor."""
import logging
from logseq_py.pipeline.enhanced_extractors import ContentAnalyzer
# Set up logging
logging.basicConfig(level=logging.INFO)
# Test the improved topic extraction
analyzer = ContentAnalyzer(max_topics=5)
# Test cases
test_cases = [
{
'title': 'Learn Python - Full Course for Beginners [Tutorial]',
'content': 'This python tutorial for beginners teaches you python programming from scratch. Learn python fundamentals, data structures, functions, and more. Python is a powerful programming language used in data science and machine learning.',
'platform': 'video'
},
{
'title': 'Machine Learning Basics - Deep Learning Neural Networks',
'content': 'Introduction to machine learning and deep learning. Learn about neural networks, training models, and artificial intelligence. We cover supervised learning, unsupervised learning, and reinforcement learning techniques.',
'platform': 'video'
},
{
'title': 'Attention is All you Need',
'content': 'Abstract: The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms.',
'platform': 'pdf'
},
]
print("=" * 80)
print("TESTING IMPROVED TOPIC EXTRACTION")
print("=" * 80)
for i, test in enumerate(test_cases, 1):
print(f"\n{'='*80}")
print(f"TEST CASE {i}")
print(f"{'='*80}")
print(f"Title: {test['title']}")
print(f"Platform: {test['platform']}")
print(f"\nContent preview: {test['content'][:100]}...")
topics = analyzer.extract_topics(
test['content'],
test['title'],
test['platform']
)
print(f"\nExtracted Topics ({len(topics)}):")
for j, topic in enumerate(topics, 1):
print(f" {j}. {topic}")
print()
print("=" * 80)
print("TEST COMPLETE")
print("=" * 80)