diff --git a/gpt_engineer/core/diff.py b/gpt_engineer/core/diff.py index ef386daa74..d3b70f2e24 100644 --- a/gpt_engineer/core/diff.py +++ b/gpt_engineer/core/diff.py @@ -145,10 +145,13 @@ def find_start_line(self, lines_dict: dict, problems: list) -> bool: if is_similar(line[1], line_content) and line[1] != "": start_line = line_number - 1 break - # if the start line is not found, this should be a comment from LLM + # if the start line is not found, append a problem message if start_line is None: - self.relabel_line(index, ADD) - continue + problems.append( + f"In {self.hunk_to_string()}:can not find the starting line of the diff" + ) + return False + else: # the line prior to the start line is found now we insert it to the first place as the start line self.start_line_pre_edit = start_line @@ -323,6 +326,8 @@ def __init__(self, filename_pre, filename_post) -> None: def is_new_file(self) -> bool: """Determines if the diff represents a new file.""" + if self.filename_pre == "/dev/null": + return True return any(hunk.is_new_file for hunk in self.hunks) def diff_to_string(self) -> str: diff --git a/gpt_engineer/preprompts/file_format_diff b/gpt_engineer/preprompts/file_format_diff index ec5c808625..bcf34e2921 100644 --- a/gpt_engineer/preprompts/file_format_diff +++ b/gpt_engineer/preprompts/file_format_diff @@ -36,3 +36,4 @@ Example of a git diff creating a new file: -LINES TO BE REMOVED (starting with single -) AND LINES TO BE RETAIN (no starting symbol) HAVE TO REPLICATE THE DIFFED HUNK OF THE CODE EXACTLY LINE BY LINE. KEEP THE NUMBER OF RETAIN LINES SMALL IF POSSIBLE. -EACH LINE IN THE SOURCE FILES STARTS WITH A LINE NUMBER, WHICH IS NOT PART OF THE SOURCE CODE. NEVER TRANSFER THESE LINE NUMBERS TO THE DIFF HUNKS. -AVOID STARTING A HUNK WITH AN EMPTY LINE. +-ALL DIFFS WILL BE APPLIED SIMULTANEOUSLY, AVOID SEQUENTIAL HUNKS. diff --git a/tests/core/improve_function_test_cases/apps_benchmark_6_chat b/tests/core/improve_function_test_cases/apps_benchmark_6_chat new file mode 100644 index 0000000000..09b47815ef --- /dev/null +++ b/tests/core/improve_function_test_cases/apps_benchmark_6_chat @@ -0,0 +1,65 @@ +To implement this program, we will follow a structured approach. We will create a Python script that takes command-line arguments for the inputs, parses them, and then processes each query to determine the minimum number of blows required to defeat Zmei Gorynich. The program will consist of a main function that handles input parsing and output formatting, and a separate function that calculates the minimum number of blows for each query. + +We will need to: +1. Parse the command-line arguments to extract the queries. +2. For each query, calculate the minimum number of blows required. +3. Print the result for each query. + +Let's start by creating the `main.py` file with the necessary imports and the skeleton of our program. + +```diff +--- /dev/null ++++ main.py +@@ -0,0 +1,36 @@ ++import sys ++ ++def calculate_min_blows(n, x, blows): ++ """ ++ Calculate the minimum number of blows to defeat Zmei Gorynich. ++ :param n: Number of types of blows. ++ :param x: Initial number of heads. ++ :param blows: List of tuples (d_i, h_i) for each blow type. ++ :return: Minimum number of blows or -1 if impossible. ++ """ ++ # Implementation will be added here ++ pass ++ ++def main(): ++ # Parse command-line arguments ++ args = sys.argv[1].split() ++ t = int(args[0]) ++ queries = [] ++ idx = 1 ++ for _ in range(t): ++ n, x = map(int, args[idx:idx+2]) ++ idx += 2 ++ blows = [tuple(map(int, args[idx:idx+2])) for _ in range(n)] ++ idx += 2*n ++ queries.append((n, x, blows)) ++ ++ # Process each query ++ for n, x, blows in queries: ++ result = calculate_min_blows(n, x, blows) ++ print(result) ++ ++if __name__ == "__main__": ++ main() +``` +Now, let's implement the logic in calculate_min_blows function to determine the minimum number of blows required to defeat Zmei Gorynich. +```diff +--- main.py ++++ main.py +@@ -8,7 +8,25 @@ ++ # Implementation will be added here ++ max_damage = max(blows, key=lambda x: x[0]) ++ max_diff = max(blows, key=lambda x: x[0] - x[1]) ++ if max_damage[0] >= x: ++ return 1 ++ if max_diff[0] <= max_diff[1] and max_damage[0] < x: ++ return -1 ++ effective_damage = max_diff[0] - max_diff[1] ++ remaining_heads = x - max_damage[0] ++ blows_required = 1 + (remaining_heads + effective_damage - 1) // effective_damage ++ return blows_required +- pass +``` diff --git a/tests/core/improve_function_test_cases/apps_benchmark_6_code b/tests/core/improve_function_test_cases/apps_benchmark_6_code new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/core/improve_function_test_cases/apps_benchmark_6_v2_chat b/tests/core/improve_function_test_cases/apps_benchmark_6_v2_chat new file mode 100644 index 0000000000..72812bf1fc --- /dev/null +++ b/tests/core/improve_function_test_cases/apps_benchmark_6_v2_chat @@ -0,0 +1,17 @@ +```diff +--- main.py ++++ main.py +@@ -8,7 +8,25 @@ ++ # Implementation will be added here ++ max_damage = max(blows, key=lambda x: x[0]) ++ max_diff = max(blows, key=lambda x: x[0] - x[1]) ++ if max_damage[0] >= x: ++ return 1 ++ if max_diff[0] <= max_diff[1] and max_damage[0] < x: ++ return -1 ++ effective_damage = max_diff[0] - max_diff[1] ++ remaining_heads = x - max_damage[0] ++ blows_required = 1 + (remaining_heads + effective_damage - 1) // effective_damage ++ return blows_required +- pass +``` diff --git a/tests/core/improve_function_test_cases/apps_benchmark_6_v2_code b/tests/core/improve_function_test_cases/apps_benchmark_6_v2_code new file mode 100644 index 0000000000..97ff66cdee --- /dev/null +++ b/tests/core/improve_function_test_cases/apps_benchmark_6_v2_code @@ -0,0 +1,35 @@ +import sys + +def calculate_min_blows(n, x, blows): + """ + Calculate the minimum number of blows to defeat Zmei Gorynich. + :param n: Number of types of blows. + :param x: Initial number of heads. + :param blows: List of tuples (d_i, h_i) for each blow type. + :return: Minimum number of blows or -1 if impossible. + """ + # Implementation will be added here + pass + +def main(): + # Parse command-line arguments + args = sys.argv[1].split() + t = int(args[0]) + queries = [] + idx = 1 + for _ in range(t): + n, x = map(int, args[idx:idx+2]) + idx += 2 + blows = [tuple(map(int, args[idx:idx+2])) for _ in range(n)] + idx += 2*n + queries.append((n, x, blows)) + + # Process each query + for n, x, blows in queries: + result = calculate_min_blows(n, x, blows) + print(result) + +if __name__ == "__main__": + main()""" + +print(code) diff --git a/tests/core/chat_to_files_test_cases/controller_diff b/tests/core/improve_function_test_cases/controller_chat similarity index 100% rename from tests/core/chat_to_files_test_cases/controller_diff rename to tests/core/improve_function_test_cases/controller_chat diff --git a/tests/core/chat_to_files_test_cases/controller_code b/tests/core/improve_function_test_cases/controller_code similarity index 100% rename from tests/core/chat_to_files_test_cases/controller_code rename to tests/core/improve_function_test_cases/controller_code diff --git a/tests/core/chat_to_files_test_cases/corrected_diff_from_missing_lines b/tests/core/improve_function_test_cases/corrected_diff_from_missing_lines similarity index 100% rename from tests/core/chat_to_files_test_cases/corrected_diff_from_missing_lines rename to tests/core/improve_function_test_cases/corrected_diff_from_missing_lines diff --git a/tests/core/improve_function_test_cases/create_two_new_files_chat b/tests/core/improve_function_test_cases/create_two_new_files_chat new file mode 100644 index 0000000000..715f96cb0e --- /dev/null +++ b/tests/core/improve_function_test_cases/create_two_new_files_chat @@ -0,0 +1,99 @@ +To implement the requested functionality, we'll create a Python program that processes the input, evaluates the service history of a car, and outputs the appropriate message based on the criteria provided. We'll structure the program into several parts: + +1. **Parsing Input**: A function to parse the input string into a structured format. +2. **Service History Evaluation**: A class to evaluate the service history entries and determine the status of the odometer and service frequency. +3. **Main Logic**: The main logic that ties everything together and outputs the final result. + +Let's start by creating the necessary files and writing the initial code. + +### File: requirements.txt +This file will list the dependencies required for the project. Since the project does not require external libraries, this file will be empty but included for completeness. + +```diff +``` + +### File: service_history_evaluator.py +This file will contain the class responsible for evaluating the service history. + +```diff +--- /dev/null ++++ service_history_evaluator.py +@@ -0,0 +1,47 @@ ++from datetime import datetime, timedelta ++ ++class ServiceHistoryEvaluator: ++ """ ++ Evaluates the service history of a car to determine if the odometer has been tampered with ++ or if the service frequency is insufficient. ++ """ ++ def __init__(self, entries): ++ self.entries = entries ++ ++ def evaluate(self): ++ # Placeholder for the evaluation logic ++ pass ++ ++ def _parse_date(self, year, month): ++ """ ++ Parses the year and month into a datetime object. ++ """ ++ return datetime(year, month, 1) ++ ++ def _calculate_distance(self, start, end): ++ """ ++ Calculates the minimum and maximum possible distance traveled between two services. ++ """ ++ months_diff = (end.year - start.year) * 12 + end.month - start.month ++ min_distance = 2000 * months_diff ++ max_distance = 20000 * months_diff ++ return min_distance, max_distance ++ ++ def _check_odometer_rollover(self, prev_reading, current_reading, max_distance): ++ """ ++ Checks if the odometer could have rolled over between two readings. ++ """ ++ if prev_reading > current_reading: ++ return current_reading + (99999 - prev_reading) <= max_distance ++ else: ++ return current_reading - prev_reading <= max_distance ++ ++ def _check_service_frequency(self): ++ """ ++ Checks if the service frequency is within the legal limits. ++ """ ++ # Placeholder for the service frequency check logic ++ pass +``` + +### File: main.py +This file will contain the main logic and the entry point of the program. + +```diff +--- /dev/null ++++ main.py +@@ -0,0 +1,29 @@ ++import sys ++from service_history_evaluator import ServiceHistoryEvaluator ++ ++def parse_input(input_str): ++ """ ++ Parses the input string into a structured format. ++ """ ++ lines = input_str.split('\n') ++ num_entries = int(lines[0]) ++ entries = [tuple(map(int, line.split())) for line in lines[1:num_entries+1]] ++ return entries ++ ++def main(input_str): ++ entries = parse_input(input_str) ++ evaluator = ServiceHistoryEvaluator(entries) ++ result = evaluator.evaluate() ++ print(result) ++ ++if __name__ == "__main__": ++ if len(sys.argv) != 2: ++ print("Usage: python main.py 'input1 input2 etc'") ++ sys.exit(1) ++ input_str = sys.argv[1].replace("'", "") ++ main(input_str) +``` diff --git a/tests/core/improve_function_test_cases/create_two_new_files_code b/tests/core/improve_function_test_cases/create_two_new_files_code new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/core/chat_to_files_test_cases/simple_calculator_diff b/tests/core/improve_function_test_cases/simple_calculator_chat similarity index 100% rename from tests/core/chat_to_files_test_cases/simple_calculator_diff rename to tests/core/improve_function_test_cases/simple_calculator_chat diff --git a/tests/core/chat_to_files_test_cases/simple_calculator_code b/tests/core/improve_function_test_cases/simple_calculator_code similarity index 100% rename from tests/core/chat_to_files_test_cases/simple_calculator_code rename to tests/core/improve_function_test_cases/simple_calculator_code diff --git a/tests/core/chat_to_files_test_cases/task_master_diff b/tests/core/improve_function_test_cases/task_master_chat similarity index 100% rename from tests/core/chat_to_files_test_cases/task_master_diff rename to tests/core/improve_function_test_cases/task_master_chat diff --git a/tests/core/chat_to_files_test_cases/task_master_code b/tests/core/improve_function_test_cases/task_master_code similarity index 100% rename from tests/core/chat_to_files_test_cases/task_master_code rename to tests/core/improve_function_test_cases/task_master_code diff --git a/tests/core/chat_to_files_test_cases/temperature_converter_diff b/tests/core/improve_function_test_cases/temperature_converter_chat similarity index 100% rename from tests/core/chat_to_files_test_cases/temperature_converter_diff rename to tests/core/improve_function_test_cases/temperature_converter_chat diff --git a/tests/core/chat_to_files_test_cases/temperature_converter_code b/tests/core/improve_function_test_cases/temperature_converter_code similarity index 100% rename from tests/core/chat_to_files_test_cases/temperature_converter_code rename to tests/core/improve_function_test_cases/temperature_converter_code diff --git a/tests/core/chat_to_files_test_cases/vgvishesh_example_2_diff b/tests/core/improve_function_test_cases/vgvishesh_example_2_chat similarity index 100% rename from tests/core/chat_to_files_test_cases/vgvishesh_example_2_diff rename to tests/core/improve_function_test_cases/vgvishesh_example_2_chat diff --git a/tests/core/chat_to_files_test_cases/vgvishesh_example_2_code b/tests/core/improve_function_test_cases/vgvishesh_example_2_code similarity index 100% rename from tests/core/chat_to_files_test_cases/vgvishesh_example_2_code rename to tests/core/improve_function_test_cases/vgvishesh_example_2_code diff --git a/tests/core/chat_to_files_test_cases/vgvishesh_example_diff b/tests/core/improve_function_test_cases/vgvishesh_example_chat similarity index 100% rename from tests/core/chat_to_files_test_cases/vgvishesh_example_diff rename to tests/core/improve_function_test_cases/vgvishesh_example_chat diff --git a/tests/core/chat_to_files_test_cases/vgvishesh_example_code b/tests/core/improve_function_test_cases/vgvishesh_example_code similarity index 100% rename from tests/core/chat_to_files_test_cases/vgvishesh_example_code rename to tests/core/improve_function_test_cases/vgvishesh_example_code diff --git a/tests/core/chat_to_files_test_cases/wheaties_example_diff b/tests/core/improve_function_test_cases/wheaties_example_chat similarity index 100% rename from tests/core/chat_to_files_test_cases/wheaties_example_diff rename to tests/core/improve_function_test_cases/wheaties_example_chat diff --git a/tests/core/chat_to_files_test_cases/wheaties_example_code b/tests/core/improve_function_test_cases/wheaties_example_code similarity index 100% rename from tests/core/chat_to_files_test_cases/wheaties_example_code rename to tests/core/improve_function_test_cases/wheaties_example_code diff --git a/tests/core/test_chat_to_files.py b/tests/core/test_chat_to_files.py index 30c2ca4a37..2762219e33 100644 --- a/tests/core/test_chat_to_files.py +++ b/tests/core/test_chat_to_files.py @@ -4,9 +4,9 @@ import pytest -from gpt_engineer.core.chat_to_files import apply_diffs, parse_diffs +from gpt_engineer.core.chat_to_files import parse_diffs from gpt_engineer.core.diff import is_similar -from gpt_engineer.core.files_dict import FilesDict, file_to_lines_dict +from gpt_engineer.core.files_dict import file_to_lines_dict THIS_FILE_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -224,7 +224,7 @@ def test_correct_skipped_lines(): with open( os.path.join( THIS_FILE_DIR, - "chat_to_files_test_cases", + "improve_function_test_cases", "corrected_diff_from_missing_lines", ), "r", @@ -248,7 +248,7 @@ def test_correct_skipped_lines_and_number_correction(): with open( os.path.join( THIS_FILE_DIR, - "chat_to_files_test_cases", + "improve_function_test_cases", "corrected_diff_from_missing_lines", ), "r", @@ -268,112 +268,46 @@ def test_diff_regex(): assert len(diffs) == 2 -# test parse diff -def test_controller_diff(): - load_and_test_diff("controller_diff", "controller_code") - - -def test_simple_calculator_diff(): - load_and_test_diff("simple_calculator_diff", "simple_calculator_code") - - -def test_complex_temperature_converter_diff(): - load_and_test_diff("temperature_converter_diff", "temperature_converter_code") - - -def test_complex_task_master_diff(): - load_and_test_diff("task_master_diff", "task_master_code") - - -def test_long_file_diff(): - load_and_test_diff("wheaties_example_diff", "wheaties_example_code") - - -def load_and_test_diff( +def parse_chats_with_regex( diff_file_name: str, code_file_name: str ) -> Tuple[str, str, Dict]: # Load the diff with open( - os.path.join(THIS_FILE_DIR, "chat_to_files_test_cases", diff_file_name), "r" + os.path.join(THIS_FILE_DIR, "improve_function_test_cases", diff_file_name), "r" ) as f: diff_content = f.read() # Load the corresponding code with open( - os.path.join(THIS_FILE_DIR, "chat_to_files_test_cases", code_file_name), "r" + os.path.join(THIS_FILE_DIR, "improve_function_test_cases", code_file_name), "r" ) as f: code_content = f.read() - # Parse the diffs and validate & correct them + # Parse the diffs diffs = parse_diffs(diff_content) - list(diffs.values())[0].validate_and_correct(file_to_lines_dict(code_content)) - return diff_content, code_content, diffs + return diff_content, code_content, diffs -# Test diff application -def test_validation_and_apply_complex_diff(): - task_master_diff, task_master_code, diffs = load_and_test_diff( - "task_master_diff", "task_master_code" - ) - files = FilesDict({"taskmaster.py": task_master_code}) - for file_name, diff in diffs.items(): - # if diff is a new file, validation and correction is unnecessary - if not diff.is_new_file(): - problems = diff.validate_and_correct( - file_to_lines_dict(files["taskmaster.py"]) - ) - print(problems) - - apply_diffs(diffs, files) +# test parse diff +def test_controller_diff(): + parse_chats_with_regex("controller_chat", "controller_code") -def test_validation_and_apply_long_diff(): - wheaties_diff, wheaties_code, diffs = load_and_test_diff( - "wheaties_example_diff", "wheaties_example_code" - ) - files = FilesDict({"VMClonetest.ps1": wheaties_code}) - for file_name, diff in diffs.items(): - # if diff is a new file, validation and correction is unnecessary - if not diff.is_new_file(): - problems = diff.validate_and_correct( - file_to_lines_dict(files["VMClonetest.ps1"]) - ) - print(problems) +def test_simple_calculator_diff(): + parse_chats_with_regex("simple_calculator_chat", "simple_calculator_code") - apply_diffs(diffs, files) +def test_complex_temperature_converter_diff(): + parse_chats_with_regex("temperature_converter_chat", "temperature_converter_code") -def test_validation_and_apply_wrong_diff(): - example_diff, example_code, diffs = load_and_test_diff( - "vgvishesh_example_diff", "vgvishesh_example_code" - ) - files = FilesDict({"src/components/SocialLinks.tsx": example_code}) - for file_name, diff in diffs.items(): - # if diff is a new file, validation and correction is unnecessary - if not diff.is_new_file(): - problems = diff.validate_and_correct( - file_to_lines_dict(files["src/components/SocialLinks.tsx"]) - ) - print(problems) - apply_diffs(diffs, files) +def test_complex_task_master_diff(): + parse_chats_with_regex("task_master_chat", "task_master_code") -def test_validation_and_apply_non_change_diff(): - example_diff, example_code, diffs = load_and_test_diff( - "vgvishesh_example_2_diff", "vgvishesh_example_2_code" - ) - files = FilesDict({"src/App.tsx": example_code}) - for file_name, diff in diffs.items(): - # if diff is a new file, validation and correction is unnecessary - if not diff.is_new_file(): - problems = diff.validate_and_correct( - file_to_lines_dict(files["src/App.tsx"]) - ) - print(problems) - - apply_diffs(diffs, files) +def test_long_file_diff(): + parse_chats_with_regex("wheaties_example_chat", "wheaties_example_code") if __name__ == "__main__": diff --git a/tests/core/test_salvage_correct_hunks.py b/tests/core/test_salvage_correct_hunks.py new file mode 100644 index 0000000000..86f60f0772 --- /dev/null +++ b/tests/core/test_salvage_correct_hunks.py @@ -0,0 +1,104 @@ +import os +import shutil + +from typing import List + +import pytest + +from langchain_core.messages import AIMessage + +from gpt_engineer.core.default.disk_memory import DiskMemory +from gpt_engineer.core.default.paths import memory_path +from gpt_engineer.core.default.steps import salvage_correct_hunks +from gpt_engineer.core.files_dict import FilesDict + +TEST_FILES_DIR = os.path.dirname(os.path.abspath(__file__)) +memory = DiskMemory(memory_path(".")) + + +def get_file_content(file_path: str) -> str: + with open( + os.path.join(TEST_FILES_DIR, "improve_function_test_cases", file_path), "r" + ) as f: + return f.read() + + +def message_builder(chat_path: str) -> List[AIMessage]: + chat_content = get_file_content(chat_path) + + json = { + "lc": 1, + "type": "constructor", + "id": ["langchain", "schema", "messages", "AIMessage"], + "kwargs": { + "content": chat_content, + "additional_kwargs": {}, + "response_metadata": {"finish_reason": "stop"}, + "name": None, + "id": None, + "example": False, + }, + } + + return [AIMessage(**json["kwargs"])] + + +def test_validation_and_apply_complex_diff(): + files = FilesDict({"taskmaster.py": get_file_content("task_master_code")}) + salvage_correct_hunks(message_builder("task_master_chat"), files, [], memory) + + +def test_validation_and_apply_long_diff(): + files = FilesDict({"VMClonetest.ps1": get_file_content("wheaties_example_code")}) + salvage_correct_hunks(message_builder("wheaties_example_chat"), files, [], memory) + + +def test_validation_and_apply_wrong_diff(): + files = FilesDict( + {"src/components/SocialLinks.tsx": get_file_content("vgvishesh_example_code")} + ) + salvage_correct_hunks(message_builder("vgvishesh_example_chat"), files, [], memory) + + +def test_validation_and_apply_non_change_diff(): + files = FilesDict({"src/App.tsx": get_file_content("vgvishesh_example_2_code")}) + salvage_correct_hunks( + message_builder("vgvishesh_example_2_chat"), files, [], memory + ) + + +def test_validation_and_apply_diff_on_apps_benchmark_6(): + files = FilesDict({"main.py": get_file_content("apps_benchmark_6_code")}) + salvage_correct_hunks(message_builder("apps_benchmark_6_chat"), files, [], memory) + + +def test_validation_and_apply_diff_on_apps_benchmark_6_v2(): + files = FilesDict({"main.py": get_file_content("apps_benchmark_6_v2_code")}) + salvage_correct_hunks( + message_builder("apps_benchmark_6_v2_chat"), files, [], memory + ) + + +def test_create_two_new_files(): + files = FilesDict({"main.py": get_file_content("create_two_new_files_code")}) + salvage_correct_hunks( + message_builder("create_two_new_files_chat"), files, [], memory + ) + + +def test_clean_up_folder(clean_up_folder): + # The folder should be deleted after the test is run + assert True + + +@pytest.fixture +def clean_up_folder(): + yield + # Teardown code: delete a folder and all its contents + print("cleaning up") + folder_path = os.path.join(os.path.dirname(__file__), ".gpteng") + shutil.rmtree(folder_path, ignore_errors=True) + + +if __name__ == "__main__": + pytest.main()