-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcodebase_manager.py
154 lines (125 loc) · 6.35 KB
/
codebase_manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import shutil
import sys
import subprocess
import re
import wandb_logging as wb
import globals
from datetime import datetime
import time
# open the config file
import yaml
with open('config.yml', 'r') as f:
config = yaml.safe_load(f)
wandb_enabled = config['wandb_enabled'] # Set the wandb_enabled flag
### Methods used by engineer, debugger and modify_codebase to build repositories
class CodebaseManager:
def __init__(self, directory):
self.parent_directory = 'generated_outputs'
self.directory = os.path.join(self.parent_directory, directory)
self.req_file_path = os.path.join(self.directory, 'requirements.txt')
@staticmethod
def extract_code(chat):
"""Extract any code files and script from response of chat"""
print("\033[95mExtracting code...\033[00m")
# Get all ``` blocks and preceding filenames
regex = r"(\S+)\n\s*```[^\n]*\n(.+?)```"
matches = re.finditer(regex, chat, re.DOTALL)
files = []
for match in matches:
# Extract the filename and code, thanks gpt-engineer for this regex!
path = re.sub(r"[<>'|?*]", "", match.group(1))
path = re.sub(r"^\[(.*)\]$", r"\1", path)
path = re.sub(r"^`(.*)`$", r"\1", path)
path = re.sub(r"\]$", "", path)
code = match.group(2)
files.append((path, code))
return files
def update_codebase(self, files):
"""Create or replace any script in the updates list."""
if not os.path.exists(self.directory):
os.makedirs(self.directory)
# Create an empty requirements.txt file only if it doesn't exist
if not os.path.exists(self.req_file_path):
with open(self.req_file_path, "w") as req_file:
req_file.write("")
for file_name, file_content in files:
# Create directories in the file path if they don't exist
os.makedirs(os.path.dirname(os.path.join(self.directory, file_name)), exist_ok=True)
with open(os.path.join(self.directory, file_name), "w") as file:
file.write(file_content)
def compress_codebase(self):
"""Extracts the existing codebase from the directory.
Compresses it into reduced token package."""
ignore_list = ['filename_to_ignore.py', 'another_file_to_ignore.yml']
ignore_endings = ['.pycache', '.pyc']
result_content = []
for root, dirs, files in os.walk(self.directory):
for filename in files:
# Skip files in ignore list
if filename in ignore_list:
continue
# Only process files not in ignore_endings list
if not any(filename.endswith(ending) for ending in ignore_endings):
filepath = os.path.join(root, filename)
try:
with open(filepath, 'r') as infile:
# Read the contents of the file, remove line breaks and leading spaces
content = infile.read().replace('\n', '').replace('\r', '')
content = ' '.join(content.split())
# Get the relative path of the file
relative_filepath = os.path.relpath(filepath, self.directory)
result_content.append(f"----{relative_filepath}----\n{content}")
e= None
except Exception as e:
print(f"Error processing file {filepath}: {e}")
return "\n".join(result_content)
def list_dependencies(self):
with open(self.req_file_path, 'r') as f:
dependencies = f.read().splitlines()
return dependencies
def run_main(self):
"""
Execute the main.py script from the working codebase (set in config).
This function utilizes subprocess.Popen to run the main.py script in a separate process.
The stdout (standard output) of the subprocess is set to be printed in real-time to the terminal.
This ensures that any prompts in the main.py script requiring user input will be displayed and can be interacted with.
Meanwhile, the stderr (standard error) of the subprocess is captured and returned,
which allows for error handling in the event of any issues during the script's execution.
Returns:
tuple: A tuple containing two items:
- The return code (exit status) of the subprocess. A value of 0 typically indicates successful execution, while any other value suggests an error.
- The error message (if any) captured from the stderr of the subprocess.
"""
start_time_ms = round(datetime.now().timestamp() * 1000)
# Start a new process to run the main.py script.
# stdout=None ensures the standard output (e.g., print statements, prompts for input)
# of the script is directly displayed in real-time on the terminal.
# stderr=subprocess.PIPE captures the error stream of the script, which can be later accessed.
# Check if 'python3' is available in PATH
if shutil.which('python3') is not None:
python_command = 'python3'
else:
python_command = 'python'
process = subprocess.Popen(
[python_command, os.path.join(self.directory, "main.py")],
text=True,
bufsize=1,
stdout=sys.stdout, # Connect subprocess's stdout directly to the terminal
stdin=sys.stdin, # Connect subprocess's stdin directly to the terminal
stderr=subprocess.PIPE
)
# Wait for the subprocess to finish and capture any error message from the stderr.
_, stderr = process.communicate()
if wandb_enabled:
wb.wandb_log_tool(
tool_name="run_main",
start_time_ms=start_time_ms,
inputs={"python_command": python_command,
"run_script_path": os.path.join(self.directory, "main.py")},
outputs={"stdout": process.stdout,
"stderr": stderr},
status= "success" if process.returncode == 0 else "error",
parent=globals.llm_span
)
return process.returncode, stderr