openevolve/tests/test_novelty_asyncio_issue.py at 24b8eca9aecacb76be90232a568758f7e4423db6 · algorithmicsuperintelligence/openevolve · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""
Test for issue #313: asyncio.run() error in novelty checking
https://github.com/algorithmicsuperintelligence/openevolve/issues/313

This test reproduces the bug where calling database.add() from within an async context
triggers a novelty check that uses asyncio.run(), which fails because it's already
running in an event loop.
"""

import unittest
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch, Mock
from openevolve.config import Config
from openevolve.database import Program, ProgramDatabase


class MockLLM:
    """Mock LLM that implements the async interface"""

    async def generate_with_context(self, system_message: str, messages: list):
        """Mock async generate method that returns NOVEL"""
        return "NOVEL"


class TestNoveltyAsyncioIssue(unittest.TestCase):
    """Test for asyncio.run() error in novelty checking (issue #313)"""

    @patch('openevolve.embedding.EmbeddingClient')
    def setUp(self, mock_embedding_client_class):
        """Set up test database with novelty checking enabled"""
        # Mock the embedding client
        mock_instance = MagicMock()
        mock_instance.get_embedding.return_value = [0.1] * 1536  # Mock embedding vector
        mock_embedding_client_class.return_value = mock_instance

        config = Config()
        config.database.in_memory = True
        config.database.embedding_model = "text-embedding-3-small"
        config.database.similarity_threshold = 0.99
        config.database.novelty_llm = MockLLM()

        self.db = ProgramDatabase(config.database)
        self.mock_embedding_client_class = mock_embedding_client_class

    def test_novelty_check_from_async_context_works(self):
        """
        Test that novelty checking works correctly when called from within
        an async context (this was the bug in issue #313).

        Expected behavior: Should successfully run the novelty check without
        any asyncio.run() errors, properly using ThreadPoolExecutor to handle
        the async LLM call from within a running event loop.
        """
        import logging

        # Create two programs with similar embeddings to trigger LLM novelty check
        program1 = Program(
            id="prog1",
            code="def test(): return 1",
            language="python",
            metrics={"score": 0.5},
        )

        program2 = Program(
            id="prog2",
            code="def test(): return 2",
            language="python",
            metrics={"score": 0.6},
            parent_id="prog1",
        )

        async def async_add_programs():
            """Add programs from async context - this simulates controller.run()"""
            # Add first program (no novelty check, no similar programs yet)
            prog1_id = self.db.add(program1)
            self.assertIsNotNone(prog1_id)

            # Add second program - this triggers novelty check
            # Since embeddings are similar (both [0.1] * 1536), it will call
            # _llm_judge_novelty which should now work correctly
            prog2_id = self.db.add(program2)

            # The novelty check should succeed without errors
            # The program should be added (MockLLM returns "NOVEL")
            self.assertIsNotNone(prog2_id)

            return True

        # This should work without any errors now
        result = asyncio.run(async_add_programs())
        self.assertTrue(result)

        # Verify both programs were added
        self.assertIn("prog1", self.db.programs)
        self.assertIn("prog2", self.db.programs)

    def test_novelty_check_from_sync_context_works(self):
        """
        Test that novelty checking also works correctly when called from
        a synchronous (non-async) context.

        Expected behavior: Should successfully run the novelty check using
        asyncio.run() since there's no running event loop.
        """
        # Create two programs with similar embeddings to trigger LLM novelty check
        program1 = Program(
            id="prog3",
            code="def test(): return 3",
            language="python",
            metrics={"score": 0.5},
        )

        program2 = Program(
            id="prog4",
            code="def test(): return 4",
            language="python",
            metrics={"score": 0.6},
            parent_id="prog3",
        )

        # Add programs from synchronous context (no event loop running)
        prog1_id = self.db.add(program1)
        self.assertIsNotNone(prog1_id)

        prog2_id = self.db.add(program2)
        self.assertIsNotNone(prog2_id)

        # Verify both programs were added
        self.assertIn("prog3", self.db.programs)
        self.assertIn("prog4", self.db.programs)

    def test_novelty_check_disabled_works_fine(self):
        """
        Test that when novelty checking is disabled, adding programs
        from async context works fine (this is the workaround from issue #313).
        """
        # Create a new database with novelty checking disabled
        config = Config()
        config.database.in_memory = True
        config.database.similarity_threshold = 0.0  # Disable novelty checking
        db_no_novelty = ProgramDatabase(config.database)

        program1 = Program(
            id="prog1",
            code="def test(): return 1",
            language="python",
            metrics={"score": 0.5},
        )

        program2 = Program(
            id="prog2",
            code="def test(): return 2",
            language="python",
            metrics={"score": 0.6},
        )

        async def async_add_programs():
            """Add programs from async context"""
            db_no_novelty.add(program1)
            db_no_novelty.add(program2)
            return True

        # This should work fine without novelty checking
        result = asyncio.run(async_add_programs())
        self.assertTrue(result)


if __name__ == "__main__":
    unittest.main()