python-tddpp/main.py at 9c90fcf04c94ca6a10b2caf2bcfc374c8e4b1e4c · JamesVorder/python-tddpp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import os

from langroid import ChatAgentConfig

from lib.utils import CodeGenSandbox
from lib.agents import CodeGenAgent, TestInterpreterAgent, GenericAgent
import typer

import langroid as lr
from langroid.utils.configuration import set_global, Settings
from langroid.utils.logging import setup_colored_logging

from TestRunner.GenericTestRunner import GenericTestRunner, SubProcessTestRunner

app = typer.Typer()
setup_colored_logging()


def chat(
        code_gen_agent: GenericAgent,
        test_interpreter: GenericAgent,
        test_runner: GenericTestRunner,
        max_epochs: int = 5
) -> None:
    code_attempt = code_gen_agent.respond(
        prompt=f"""
            You are an expert at writing Python code.
            Fill in the following class skeleton.
            Do NOT add any other methods or commentary.
            Your response should be ONLY the python code.
            DO NOT ADD ``` IN THE BEGINNING OR END
            DO NOT ADD python COMMAND IN THE BEGINNING
            Keep Imported Modules or Classes
            Do not say 'here is the python code'
            Do not surround your response with quotes or backticks.
            DO NOT EVER USE ``` in your output.
            You should maintain any comments that were provided in the class skeleton.
            You should maintain the exact method signatures provided in the class skeleton.
            Your output MUST be valid, runnable python code and NOTHING else.
            Your output MUST NOT include any usage of testing tools like unittest, pytest, mock, etc.
            {code_gen_agent.class_skeleton}
        """
    )
    solved = False
    for _ in range(max_epochs - 1):
        # test_exit_code, test_result(s = get_test_results()
        test_exit_code, test_results = test_runner.run()
        print(test_results)
        if test_exit_code == 0:
            solved = True
            print("Done!")
            break
        else:
            test_interpreter.set_latest_test_results(test_results)
            test_interpreter.set_latest_test_exit_code(test_exit_code)
            results_insights = test_interpreter.respond(
                prompt=f"""
                You are an expert at interpreting the results of unit tests, and providing insight into what they mean.
                You should be descriptive about what variables are incorrect, and in what way.
                You should include information about which methods should be modified, and in what way (without providing code.)
                You should not provide code.
                You should NEVER attempt to modify the tests, or give advice to modify the tests.
                Give results in a bulleted list, with one bullet for each method that fails tests.
                Keep insights very brief, providing a maximum of 3 sentences about each method that failed a test.
                If there are any ```, suggest their removal
                If there is a ```python, suggest its removal
                Please provide insights about the following test results:
                {test_interpreter.latest_test_results}
                Those results were produced by the following code:
                {test_interpreter.latest_test_exit_code}
                """
            )
            code_gen_agent.set_previous_code_attempt(code_attempt)
            code_gen_agent.set_latest_test_result(test_results)
            code_gen_agent.set_latest_test_result_interpretation(results_insights)
            code_attempt = code_gen_agent.respond(
                prompt=f"""
                You are an expert at writing Python code.
                Consider the following code, and the following test results.
                Here is the code:
                {code_gen_agent.previous_code_attempt}
                Here are the test results:
                {code_gen_agent.latest_test_result}
                In addition, you may consider these insights about the test results when coming up with your solution:
                {code_gen_agent.latest_test_result_interpretation}
                Update the code so that the tests will pass.
                DO NOT ADD ``` IN THE BEGINNING OR END
                DO NOT ADD python COMMAND IN THE BEGINNING
                REMOVE ``` IN THE BEGINNING OR END
                REMOVE python COMMAND IN THE BEGINNING
                Any output strings should match test output
                Your output MUST contain all the same classes and methods as the input code.
                CORRECT ONLY ONE METHOD AT A TIME
                Do NOT add any other methods or commentary.
                Your response should be ONLY the python code.
                Do not say 'here is the python code'
                Do not surround your response with quotes or backticks.
                DO NOT EVER USE ``` in your output.
                Your response should NEVER start or end with ```
                You should maintain any comments that were provided in the code.
                You should maintain the exact method signatures provided in the code.
                Your output MUST be valid, runnable python code and NOTHING else.
                Your output MUST NOT include any usage of testing tools like unittest, pytest, mock, etc.
                """
            )
    if not solved:
        print(f"Reached the end of epoch {max_epochs} without finding a solution :(")


@app.command()
def main(
    debug: bool = typer.Option(False, "--debug", "-d", help="debug mode"),
    no_stream: bool = typer.Option(False, "--nostream", "-ns", help="no streaming"),
    nocache: bool = typer.Option(False, "--nocache", "-nc", help="don't use cache"),
    project_dir: str = typer.Argument(
        default=".",
        help="The project directory that contains your tests and class skeleton. "
             "This directory may also have other contents. "
             "The directory you give here will be cloned into a 'sandbox' for the code generator to operate in."
    ),
    class_skeleton_path: str = typer.Argument(
        default=os.path.join("assets", "test_class.py"),
        help="Path to the class skeleton file, relative to project_dir."
    ),
    test_path: str = typer.Argument(
        default=os.path.join(".", "test"),
        help="Path to the test file or directory, relative to project_dir."
    ),
    sandbox_path: str = typer.Option(
        "./build", "--sandbox-path", "-s",
        help="You may optionally specify a location for the sandbox in which the code generator operates."
             "Default: ./build"
    ),
    max_epochs: int = typer.Option(
        5, "--max-epochs", "-n", help="The maximum number of times to let the code generator try"
                                      "before giving up."
    )
) -> None:
    set_global(
        Settings(
            debug=debug,
            cache=not nocache,
            stream=not no_stream,
        )
    )

    llama3 = ChatAgentConfig(
        llm=lr.language_models.OpenAIGPTConfig(
            #chat_model="ollama/llama3.1:latest",
            #chat_context_length=128000
            chat_model="litellm/gemini/gemini-1.5-flash-latest",
            chat_context_length=1000000
        ),
        vecdb=None
    )

    sandbox = CodeGenSandbox(project_dir, class_skeleton_path, test_path, sandbox_path)
    sandbox.init_sandbox()
    code_generator: GenericAgent = CodeGenAgent(sandbox, llama3)
    test_interpreter: GenericAgent = TestInterpreterAgent(sandbox, llama3)
    test_runner: GenericTestRunner = SubProcessTestRunner(sandbox)
    chat(code_generator, test_interpreter, test_runner, max_epochs)


if __name__ == "__main__":
    app()