From 61bdfd3b99f72e166c6de4da36cc75756581e98d Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sat, 10 Aug 2024 21:55:51 +0300 Subject: [PATCH 01/10] patch_extra_lines_before and patch_extra_lines_after --- .../usage-guide/additional_configurations.md | 3 ++- pr_agent/algo/git_patch_processing.py | 27 ++++++------------- pr_agent/algo/pr_processing.py | 25 +++++++---------- pr_agent/settings/configuration.toml | 3 ++- 4 files changed, 22 insertions(+), 36 deletions(-) diff --git a/docs/docs/usage-guide/additional_configurations.md b/docs/docs/usage-guide/additional_configurations.md index 4ae014148..8d8e75dcc 100644 --- a/docs/docs/usage-guide/additional_configurations.md +++ b/docs/docs/usage-guide/additional_configurations.md @@ -66,7 +66,8 @@ By default, around any change in your PR, git patch provides three lines of cont For the `review`, `describe`, `ask` and `add_docs` tools, if the token budget allows, PR-Agent tries to increase the number of lines of context, via the parameter: ``` [config] -patch_extra_lines=3 +patch_extra_lines_before=6 +patch_extra_lines_after=2 ``` Increasing this number provides more context to the model, but will also increase the token budget. diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 15343c97a..5cb18b3a0 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -7,19 +7,8 @@ from pr_agent.log import get_logger -def extend_patch(original_file_str, patch_str, num_lines) -> str: - """ - Extends the given patch to include a specified number of surrounding lines. - - Args: - original_file_str (str): The original file to which the patch will be applied. - patch_str (str): The patch to be applied to the original file. - num_lines (int): The number of surrounding lines to include in the extended patch. - - Returns: - str: The extended patch string. - """ - if not patch_str or num_lines == 0: +def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch_extra_lines_after=0) -> str: + if not patch_str or (patch_extra_lines_before == 0 and patch_extra_lines_after == 0): return patch_str if type(original_file_str) == bytes: @@ -43,7 +32,7 @@ def extend_patch(original_file_str, patch_str, num_lines) -> str: # finish previous hunk if start1 != -1: extended_patch_lines.extend( - original_lines[start1 + size1 - 1:start1 + size1 - 1 + num_lines]) + original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]) res = list(match.groups()) for i in range(len(res)): @@ -55,10 +44,10 @@ def extend_patch(original_file_str, patch_str, num_lines) -> str: start1, size1, size2 = map(int, res[:3]) start2 = 0 section_header = res[4] - extended_start1 = max(1, start1 - num_lines) - extended_size1 = size1 + (start1 - extended_start1) + num_lines - extended_start2 = max(1, start2 - num_lines) - extended_size2 = size2 + (start2 - extended_start2) + num_lines + extended_start1 = max(1, start1 - patch_extra_lines_before) + extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after + extended_start2 = max(1, start2 - patch_extra_lines_before) + extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after extended_patch_lines.append( f'@@ -{extended_start1},{extended_size1} ' f'+{extended_start2},{extended_size2} @@ {section_header}') @@ -74,7 +63,7 @@ def extend_patch(original_file_str, patch_str, num_lines) -> str: # finish previous hunk if start1 != -1: extended_patch_lines.extend( - original_lines[start1 + size1 - 1:start1 + size1 - 1 + num_lines]) + original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]) extended_patch_str = '\n'.join(extended_patch_lines) return extended_patch_str diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index d635ec35f..80a8ded7e 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -33,9 +33,11 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, large_pr_handling=False, return_remaining_files=False): if disable_extra_lines: - PATCH_EXTRA_LINES = 0 + PATCH_EXTRA_LINES_BEFORE = 0 + PATCH_EXTRA_LINES_AFTER = 0 else: - PATCH_EXTRA_LINES = get_settings().config.patch_extra_lines + PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before + PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after try: diff_files_original = git_provider.get_diff_files() @@ -64,7 +66,8 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, # generate a standard diff string, with patch extension patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff( - pr_languages, token_handler, add_line_numbers_to_hunks, patch_extra_lines=PATCH_EXTRA_LINES) + pr_languages, token_handler, add_line_numbers_to_hunks, + patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE, patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER) # if we are under the limit, return the full diff if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model): @@ -174,17 +177,8 @@ def get_pr_diff_multiple_patchs(git_provider: GitProvider, token_handler: TokenH def pr_generate_extended_diff(pr_languages: list, token_handler: TokenHandler, add_line_numbers_to_hunks: bool, - patch_extra_lines: int = 0) -> Tuple[list, int, list]: - """ - Generate a standard diff string with patch extension, while counting the number of tokens used and applying diff - minimization techniques if needed. - - Args: - - pr_languages: A list of dictionaries representing the languages used in the pull request and their corresponding - files. - - token_handler: An object of the TokenHandler class used for handling tokens in the context of the pull request. - - add_line_numbers_to_hunks: A boolean indicating whether to add line numbers to the hunks in the diff. - """ + patch_extra_lines_before: int = 0, + patch_extra_lines_after: int = 0) -> Tuple[list, int, list]: total_tokens = token_handler.prompt_tokens # initial tokens patches_extended = [] patches_extended_tokens = [] @@ -196,7 +190,8 @@ def pr_generate_extended_diff(pr_languages: list, continue # extend each patch with extra lines of context - extended_patch = extend_patch(original_file_content_str, patch, num_lines=patch_extra_lines) + extended_patch = extend_patch(original_file_content_str, patch, + patch_extra_lines_before, patch_extra_lines_after) if not extended_patch: get_logger().warning(f"Failed to extend patch for file: {file.filename}") continue diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 5336a48a9..7598462d3 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -20,7 +20,8 @@ max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. custom_model_max_tokens=-1 # for models not in the default list # -patch_extra_lines = 1 +patch_extra_lines_before = 6 +patch_extra_lines_after = 2 secret_provider="" cli_mode=false ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs From e238a888247361abdf83b6f6a31a298b5fc4fcc7 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 11 Aug 2024 09:21:34 +0300 Subject: [PATCH 02/10] Add tests for patch extension and update configuration for extra lines handling - Added unit tests in `test_extend_patch.py` and `test_pr_generate_extended_diff.py` to verify patch extension functionality with extra lines. - Updated `pr_processing.py` to include `patch_extra_lines_before` and `patch_extra_lines_after` settings. - Modified `configuration.toml` to adjust `patch_extra_lines_before` to 4 and `max_context_tokens` to 16000. - Enabled extra lines in `pr_code_suggestions.py`. - Added new model `claude-3-5-sonnet` to `__init__.py`. --- pr_agent/algo/__init__.py | 1 + pr_agent/algo/pr_processing.py | 7 +- pr_agent/settings/configuration.toml | 4 +- pr_agent/tools/pr_code_suggestions.py | 2 +- tests/unittest/test_extend_patch.py | 110 +++++++++++++++----------- 5 files changed, 74 insertions(+), 50 deletions(-) diff --git a/pr_agent/algo/__init__.py b/pr_agent/algo/__init__.py index f51c4415d..f7aa6b60e 100644 --- a/pr_agent/algo/__init__.py +++ b/pr_agent/algo/__init__.py @@ -46,6 +46,7 @@ 'bedrock/anthropic.claude-3-sonnet-20240229-v1:0': 100000, 'bedrock/anthropic.claude-3-haiku-20240307-v1:0': 100000, 'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0': 100000, + 'claude-3-5-sonnet': 100000, 'groq/llama3-8b-8192': 8192, 'groq/llama3-70b-8192': 8192, 'groq/mixtral-8x7b-32768': 32768, diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 80a8ded7e..1cad30b6e 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -400,10 +400,13 @@ def get_pr_multi_diffs(git_provider: GitProvider, for lang in pr_languages: sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True)) - # try first a single run with standard diff string, with patch extension, and no deletions patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff( - pr_languages, token_handler, add_line_numbers_to_hunks=True) + pr_languages, token_handler, add_line_numbers_to_hunks=True, + patch_extra_lines_before=get_settings().config.patch_extra_lines_before, + patch_extra_lines_after=get_settings().config.patch_extra_lines_after) + + # if we are under the limit, return the full diff if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model): return ["\n".join(patches_extended)] if patches_extended else [] diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 7598462d3..6264631a9 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -20,7 +20,7 @@ max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. custom_model_max_tokens=-1 # for models not in the default list # -patch_extra_lines_before = 6 +patch_extra_lines_before = 4 patch_extra_lines_after = 2 secret_provider="" cli_mode=false @@ -97,7 +97,7 @@ enable_help_text=false [pr_code_suggestions] # /improve # -max_context_tokens=10000 +max_context_tokens=16000 num_code_suggestions=4 commitable_code_suggestions = false extra_instructions = "" diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index f98590ce3..1a965192a 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -286,7 +286,7 @@ async def _prepare_prediction(self, model: str) -> dict: self.token_handler, model, add_line_numbers_to_hunks=True, - disable_extra_lines=True) + disable_extra_lines=False) if self.patches_diff: get_logger().debug(f"PR diff", artifact=self.patches_diff) diff --git a/tests/unittest/test_extend_patch.py b/tests/unittest/test_extend_patch.py index ba0af881b..cb2b3c9cd 100644 --- a/tests/unittest/test_extend_patch.py +++ b/tests/unittest/test_extend_patch.py @@ -1,44 +1,6 @@ - -# Generated by CodiumAI - - +import pytest from pr_agent.algo.git_patch_processing import extend_patch - -""" -Code Analysis - -Objective: -The objective of the 'extend_patch' function is to extend a given patch to include a specified number of surrounding -lines. This function takes in an original file string, a patch string, and the number of lines to extend the patch by, -and returns the extended patch string. - -Inputs: -- original_file_str: a string representing the original file -- patch_str: a string representing the patch to be extended -- num_lines: an integer representing the number of lines to extend the patch by - -Flow: -1. Split the original file string and patch string into separate lines -2. Initialize variables to keep track of the current hunk's start and size for both the original file and the patch -3. Iterate through each line in the patch string -4. If the line starts with '@@', extract the start and size values for both the original file and the patch, and -calculate the extended start and size values -5. Append the extended hunk header to the extended patch lines list -6. Append the specified number of lines before the hunk to the extended patch lines list -7. Append the current line to the extended patch lines list -8. If the line is not a hunk header, append it to the extended patch lines list -9. Return the extended patch string - -Outputs: -- extended_patch_str: a string representing the extended patch - -Additional aspects: -- The function uses regular expressions to extract the start and size values from the hunk header -- The function handles cases where the start value of a hunk is less than the number of lines to extend by by setting -the extended start value to 1 -- The function handles cases where the hunk extends beyond the end of the original file by only including lines up to -the end of the original file in the extended patch -""" +from pr_agent.algo.token_handler import TokenHandler class TestExtendPatch: @@ -48,7 +10,8 @@ def test_happy_path(self): patch_str = '@@ -2,2 +2,2 @@ init()\n-line2\n+new_line2\nline3' num_lines = 1 expected_output = '@@ -1,4 +1,4 @@ init()\nline1\n-line2\n+new_line2\nline3\nline4' - actual_output = extend_patch(original_file_str, patch_str, num_lines) + actual_output = extend_patch(original_file_str, patch_str, + patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) assert actual_output == expected_output # Tests that the function returns an empty string when patch_str is empty @@ -57,14 +20,16 @@ def test_empty_patch(self): patch_str = '' num_lines = 1 expected_output = '' - assert extend_patch(original_file_str, patch_str, num_lines) == expected_output + assert extend_patch(original_file_str, patch_str, + patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) == expected_output # Tests that the function returns the original patch when num_lines is 0 def test_zero_num_lines(self): original_file_str = 'line1\nline2\nline3\nline4\nline5' patch_str = '@@ -2,2 +2,2 @@ init()\n-line2\n+new_line2\nline3' num_lines = 0 - assert extend_patch(original_file_str, patch_str, num_lines) == patch_str + assert extend_patch(original_file_str, patch_str, + patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) == patch_str # Tests that the function returns the original patch when patch_str contains no hunks def test_no_hunks(self): @@ -80,7 +45,8 @@ def test_single_hunk(self): patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\nline3\nline4' num_lines = 1 expected_output = '@@ -1,5 +1,5 @@ init()\nline1\n-line2\n+new_line2\nline3\nline4\nline5' - actual_output = extend_patch(original_file_str, patch_str, num_lines) + actual_output = extend_patch(original_file_str, patch_str, + patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) assert actual_output == expected_output # Tests the functionality of extending a patch with multiple hunks. @@ -89,5 +55,59 @@ def test_multiple_hunks(self): patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\nline3\nline4\n@@ -4,1 +4,1 @@ init2()\n-line4\n+new_line4' # noqa: E501 num_lines = 1 expected_output = '@@ -1,5 +1,5 @@ init()\nline1\n-line2\n+new_line2\nline3\nline4\nline5\n@@ -3,3 +3,3 @@ init2()\nline3\n-line4\n+new_line4\nline5' # noqa: E501 - actual_output = extend_patch(original_file_str, patch_str, num_lines) + actual_output = extend_patch(original_file_str, patch_str, + patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) assert actual_output == expected_output + + +class PRProcessingTest: + class File: + def __init__(self, base_file, patch, filename): + self.base_file = base_file + self.patch = patch + self.filename = filename + + @pytest.fixture + def token_handler(self): + # Create a TokenHandler instance with dummy data + th = TokenHandler(system="System prompt", user="User prompt") + th.prompt_tokens = 100 + return th + + @pytest.fixture + def pr_languages(self): + # Create a list of languages with files containing base_file and patch data + return [ + { + 'files': [ + self.File(base_file="line000\nline00\nline0\nline1\noriginal content\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10", + patch="@@ -5,5 +5,5 @@\n-original content\n+modified content\nline2\nline3\nline4\nline5", + filename="file1"), + self.File(base_file="original content\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10", + patch="@@ -6,5 +6,5 @@\nline6\nline7\nline8\n-line9\n+modified line9\nline10", + filename="file2") + ] + } + ] + + def test_extend_patches_with_extra_lines(self, token_handler, pr_languages): + patches_extended_no_extra_lines, total_tokens, patches_extended_tokens = pr_generate_extended_diff( + pr_languages, token_handler, add_line_numbers_to_hunks=False, + patch_extra_lines_before=0, + patch_extra_lines_after=0 + ) + + # Check that with no extra lines, the patches are the same as the original patches + p0 = patches_extended_no_extra_lines[0].strip() + p1 = patches_extended_no_extra_lines[1].strip() + assert p0 == '## file1\n\n' + pr_languages[0]['files'][0].patch.strip() + assert p1 == '## file2\n\n' + pr_languages[0]['files'][1].patch.strip() + + patches_extended_with_extra_lines, total_tokens, patches_extended_tokens = pr_generate_extended_diff( + pr_languages, token_handler, add_line_numbers_to_hunks=False, + patch_extra_lines_before=2, + patch_extra_lines_after=1 + ) + + p0_extended = patches_extended_with_extra_lines[0].strip() + assert p0_extended == '## file1\n\n@@ -3,8 +3,8 @@ \nline0\nline1\n-original content\n+modified content\nline2\nline3\nline4\nline5\nline6' From 2b2b851cb9d7f68e5de38c1bb198796b59d9334b Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 11 Aug 2024 11:29:31 +0300 Subject: [PATCH 03/10] Update test class name and adjust patch extra lines configuration - Renamed test class to `TestExtendedPatchMoreLines` in `test_extend_patch.py` - Imported `pr_generate_extended_diff` in `test_extend_patch.py` - Updated `patch_extra_lines_before` to 4 in `additional_configurations.md` --- docs/docs/usage-guide/additional_configurations.md | 2 +- tests/unittest/test_extend_patch.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/docs/usage-guide/additional_configurations.md b/docs/docs/usage-guide/additional_configurations.md index 8d8e75dcc..121d77b61 100644 --- a/docs/docs/usage-guide/additional_configurations.md +++ b/docs/docs/usage-guide/additional_configurations.md @@ -66,7 +66,7 @@ By default, around any change in your PR, git patch provides three lines of cont For the `review`, `describe`, `ask` and `add_docs` tools, if the token budget allows, PR-Agent tries to increase the number of lines of context, via the parameter: ``` [config] -patch_extra_lines_before=6 +patch_extra_lines_before=4 patch_extra_lines_after=2 ``` diff --git a/tests/unittest/test_extend_patch.py b/tests/unittest/test_extend_patch.py index cb2b3c9cd..7737ee8d1 100644 --- a/tests/unittest/test_extend_patch.py +++ b/tests/unittest/test_extend_patch.py @@ -1,5 +1,6 @@ import pytest from pr_agent.algo.git_patch_processing import extend_patch +from pr_agent.algo.pr_processing import pr_generate_extended_diff from pr_agent.algo.token_handler import TokenHandler @@ -60,7 +61,7 @@ def test_multiple_hunks(self): assert actual_output == expected_output -class PRProcessingTest: +class TestExtendedPatchMoreLines: class File: def __init__(self, base_file, patch, filename): self.base_file = base_file From 7438190ed100f5ea0911b8ad06371489837b1d3c Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 11 Aug 2024 11:43:57 +0300 Subject: [PATCH 04/10] set_claude_model --- pr_agent/git_providers/utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pr_agent/git_providers/utils.py b/pr_agent/git_providers/utils.py index a0d65b668..8a9579cff 100644 --- a/pr_agent/git_providers/utils.py +++ b/pr_agent/git_providers/utils.py @@ -47,3 +47,17 @@ def apply_repo_settings(pr_url): os.remove(repo_settings_file) except Exception as e: get_logger().error(f"Failed to remove temporary settings file {repo_settings_file}", e) + + # enable switching models with a short definition + if get_settings().config.model.lower()=='claude-3-5-sonnet': + set_claude_model() + + +def set_claude_model(): + """ + set the claude-sonnet-3.5 model easily (even by users), just by stating: --config.model='claude-3-5-sonnet' + """ + model_claude = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0" + get_settings().set('config.model', model_claude) + get_settings().set('config.model_turbo', model_claude) + get_settings().set('config.fallback_models', [model_claude]) From 983233c193306b3605c387a54301b51ecda67534 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 11 Aug 2024 11:48:50 +0300 Subject: [PATCH 05/10] Clarify comments for patch_extra_lines_before and patch_extra_lines_after in configuration.toml --- pr_agent/settings/configuration.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 6264631a9..6113c3f69 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -20,8 +20,8 @@ max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. custom_model_max_tokens=-1 # for models not in the default list # -patch_extra_lines_before = 4 -patch_extra_lines_after = 2 +patch_extra_lines_before = 4 # Number of extra lines (+3 default ones) to include before each hunk in the patch +patch_extra_lines_after = 2 # Number of extra lines (+3 default ones) to include after each hunk in the patch secret_provider="" cli_mode=false ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs From ed65493718d2e307d58f332da9e0543bd4e1bda2 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 11 Aug 2024 12:08:00 +0300 Subject: [PATCH 06/10] Handle edge cases for patch extension and update tests --- pr_agent/algo/git_patch_processing.py | 13 +++++++++---- tests/unittest/test_extend_patch.py | 11 ++++++----- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 5cb18b3a0..ba98e54d5 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -18,6 +18,7 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch return "" original_lines = original_file_str.splitlines() + len_original_lines = len(original_lines) patch_lines = patch_str.splitlines() extended_patch_lines = [] @@ -29,8 +30,8 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch if line.startswith('@@'): match = RE_HUNK_HEADER.match(line) if match: - # finish previous hunk - if start1 != -1: + # finish last hunk + if start1 != -1 and patch_extra_lines_after > 0: extended_patch_lines.extend( original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]) @@ -46,8 +47,12 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch section_header = res[4] extended_start1 = max(1, start1 - patch_extra_lines_before) extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after + if extended_start1 - 1 + extended_size1 > len(original_lines): + extended_size1 = len_original_lines - extended_start1 + 1 extended_start2 = max(1, start2 - patch_extra_lines_before) extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after + if extended_start2 - 1 + extended_size2 > len_original_lines: + extended_size2 = len_original_lines - extended_start2 + 1 extended_patch_lines.append( f'@@ -{extended_start1},{extended_size1} ' f'+{extended_start2},{extended_size2} @@ {section_header}') @@ -60,8 +65,8 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch get_logger().error(f"Failed to extend patch: {e}") return patch_str - # finish previous hunk - if start1 != -1: + # finish last hunk + if start1 != -1 and patch_extra_lines_after > 0: extended_patch_lines.extend( original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]) diff --git a/tests/unittest/test_extend_patch.py b/tests/unittest/test_extend_patch.py index 7737ee8d1..f44d74179 100644 --- a/tests/unittest/test_extend_patch.py +++ b/tests/unittest/test_extend_patch.py @@ -44,11 +44,12 @@ def test_no_hunks(self): def test_single_hunk(self): original_file_str = 'line1\nline2\nline3\nline4\nline5' patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\nline3\nline4' - num_lines = 1 - expected_output = '@@ -1,5 +1,5 @@ init()\nline1\n-line2\n+new_line2\nline3\nline4\nline5' - actual_output = extend_patch(original_file_str, patch_str, - patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) - assert actual_output == expected_output + + for num_lines in [1, 2, 3]: # check that even if we are over the number of lines in the file, the function still works + expected_output = '@@ -1,5 +1,5 @@ init()\nline1\n-line2\n+new_line2\nline3\nline4\nline5' + actual_output = extend_patch(original_file_str, patch_str, + patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) + assert actual_output == expected_output # Tests the functionality of extending a patch with multiple hunks. def test_multiple_hunks(self): From 9c3f0801125d1baf81f20e4bb7de7074f814959e Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 11 Aug 2024 12:15:47 +0300 Subject: [PATCH 07/10] comments --- pr_agent/algo/pr_processing.py | 2 +- pr_agent/settings/configuration.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 1cad30b6e..a51820d9a 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -75,7 +75,7 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, f"returning full diff.") return "\n".join(patches_extended) - # if we are over the limit, start pruning + # if we are over the limit, start pruning (If we got here, we will not extend the patches with extra lines) get_logger().info(f"Tokens: {total_tokens}, total tokens over limit: {get_max_tokens(model)}, " f"pruning diff.") patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list = \ diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 6113c3f69..0b1697b0e 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -97,7 +97,7 @@ enable_help_text=false [pr_code_suggestions] # /improve # -max_context_tokens=16000 +max_context_tokens=14000 num_code_suggestions=4 commitable_code_suggestions = false extra_instructions = "" From df04a7e0469a4ac37fd480f46b851a04d9235600 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 11 Aug 2024 12:32:26 +0300 Subject: [PATCH 08/10] Add spaces to extra lines in patch extension for consistency --- pr_agent/algo/git_patch_processing.py | 16 ++++++++++------ tests/unittest/test_extend_patch.py | 16 ++++++++-------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index ba98e54d5..44725f0ee 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -32,8 +32,9 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch if match: # finish last hunk if start1 != -1 and patch_extra_lines_after > 0: - extended_patch_lines.extend( - original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]) + delta_lines = original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after] + delta_lines = [f' {line}' for line in delta_lines] + extended_patch_lines.extend(delta_lines) res = list(match.groups()) for i in range(len(res)): @@ -56,8 +57,9 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch extended_patch_lines.append( f'@@ -{extended_start1},{extended_size1} ' f'+{extended_start2},{extended_size2} @@ {section_header}') - extended_patch_lines.extend( - original_lines[extended_start1 - 1:start1 - 1]) # one to zero based + delta_lines = original_lines[extended_start1 - 1:start1 - 1] + delta_lines = [f' {line}' for line in delta_lines] + extended_patch_lines.extend(delta_lines) # one to zero based continue extended_patch_lines.append(line) except Exception as e: @@ -67,8 +69,10 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch # finish last hunk if start1 != -1 and patch_extra_lines_after > 0: - extended_patch_lines.extend( - original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]) + delta_lines = original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after] + # add space at the beginning of each extra line + delta_lines = [f' {line}' for line in delta_lines] + extended_patch_lines.extend(delta_lines) extended_patch_str = '\n'.join(extended_patch_lines) return extended_patch_str diff --git a/tests/unittest/test_extend_patch.py b/tests/unittest/test_extend_patch.py index f44d74179..9d309822f 100644 --- a/tests/unittest/test_extend_patch.py +++ b/tests/unittest/test_extend_patch.py @@ -8,9 +8,9 @@ class TestExtendPatch: # Tests that the function works correctly with valid input def test_happy_path(self): original_file_str = 'line1\nline2\nline3\nline4\nline5' - patch_str = '@@ -2,2 +2,2 @@ init()\n-line2\n+new_line2\nline3' + patch_str = '@@ -2,2 +2,2 @@ init()\n-line2\n+new_line2\n line3' num_lines = 1 - expected_output = '@@ -1,4 +1,4 @@ init()\nline1\n-line2\n+new_line2\nline3\nline4' + expected_output = '@@ -1,4 +1,4 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4' actual_output = extend_patch(original_file_str, patch_str, patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) assert actual_output == expected_output @@ -43,10 +43,10 @@ def test_no_hunks(self): # Tests that the function extends a patch with a single hunk correctly def test_single_hunk(self): original_file_str = 'line1\nline2\nline3\nline4\nline5' - patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\nline3\nline4' + patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\n line3\n line4' for num_lines in [1, 2, 3]: # check that even if we are over the number of lines in the file, the function still works - expected_output = '@@ -1,5 +1,5 @@ init()\nline1\n-line2\n+new_line2\nline3\nline4\nline5' + expected_output = '@@ -1,5 +1,5 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4\n line5' actual_output = extend_patch(original_file_str, patch_str, patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) assert actual_output == expected_output @@ -54,9 +54,9 @@ def test_single_hunk(self): # Tests the functionality of extending a patch with multiple hunks. def test_multiple_hunks(self): original_file_str = 'line1\nline2\nline3\nline4\nline5\nline6' - patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\nline3\nline4\n@@ -4,1 +4,1 @@ init2()\n-line4\n+new_line4' # noqa: E501 + patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\n line3\n line4\n@@ -4,1 +4,1 @@ init2()\n-line4\n+new_line4' # noqa: E501 num_lines = 1 - expected_output = '@@ -1,5 +1,5 @@ init()\nline1\n-line2\n+new_line2\nline3\nline4\nline5\n@@ -3,3 +3,3 @@ init2()\nline3\n-line4\n+new_line4\nline5' # noqa: E501 + expected_output = '@@ -1,5 +1,5 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4\n line5\n@@ -3,3 +3,3 @@ init2()\n line3\n-line4\n+new_line4\n line5' # noqa: E501 actual_output = extend_patch(original_file_str, patch_str, patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) assert actual_output == expected_output @@ -83,7 +83,7 @@ def pr_languages(self): { 'files': [ self.File(base_file="line000\nline00\nline0\nline1\noriginal content\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10", - patch="@@ -5,5 +5,5 @@\n-original content\n+modified content\nline2\nline3\nline4\nline5", + patch="@@ -5,5 +5,5 @@\n-original content\n+modified content\n line2\n line3\n line4\n line5", filename="file1"), self.File(base_file="original content\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10", patch="@@ -6,5 +6,5 @@\nline6\nline7\nline8\n-line9\n+modified line9\nline10", @@ -112,4 +112,4 @@ def test_extend_patches_with_extra_lines(self, token_handler, pr_languages): ) p0_extended = patches_extended_with_extra_lines[0].strip() - assert p0_extended == '## file1\n\n@@ -3,8 +3,8 @@ \nline0\nline1\n-original content\n+modified content\nline2\nline3\nline4\nline5\nline6' + assert p0_extended == '## file1\n\n@@ -3,8 +3,8 @@ \n line0\n line1\n-original content\n+modified content\n line2\n line3\n line4\n line5\n line6' From e85b75fe64b624c1d9519830bb9ee4f505f4bedb Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 11 Aug 2024 12:56:56 +0300 Subject: [PATCH 09/10] Refactor patch extension logic to handle cases with zero extra lines --- pr_agent/algo/git_patch_processing.py | 33 +++++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 44725f0ee..69e06fcf1 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -46,19 +46,32 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch start1, size1, size2 = map(int, res[:3]) start2 = 0 section_header = res[4] - extended_start1 = max(1, start1 - patch_extra_lines_before) - extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after - if extended_start1 - 1 + extended_size1 > len(original_lines): - extended_size1 = len_original_lines - extended_start1 + 1 - extended_start2 = max(1, start2 - patch_extra_lines_before) - extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after - if extended_start2 - 1 + extended_size2 > len_original_lines: - extended_size2 = len_original_lines - extended_start2 + 1 + + if patch_extra_lines_before > 0 or patch_extra_lines_after > 0: + extended_start1 = max(1, start1 - patch_extra_lines_before) + extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after + if extended_start1 - 1 + extended_size1 > len(original_lines): + extended_size1 = len_original_lines - extended_start1 + 1 + extended_start2 = max(1, start2 - patch_extra_lines_before) + extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after + if extended_start2 - 1 + extended_size2 > len_original_lines: + extended_size2 = len_original_lines - extended_start2 + 1 + delta_lines = original_lines[extended_start1 - 1:start1 - 1] + delta_lines = [f' {line}' for line in delta_lines] + if section_header: + for line in delta_lines: + if section_header in line: + section_header = '' # remove section header if it is in the extra delta lines + break + else: + extended_start1 = start1 + extended_size1 = size1 + extended_start2 = start2 + extended_size2 = size2 + delta_lines = [] extended_patch_lines.append( f'@@ -{extended_start1},{extended_size1} ' f'+{extended_start2},{extended_size2} @@ {section_header}') - delta_lines = original_lines[extended_start1 - 1:start1 - 1] - delta_lines = [f' {line}' for line in delta_lines] extended_patch_lines.extend(delta_lines) # one to zero based continue extended_patch_lines.append(line) From 23aa2a9388c463d966510666431859117f524b1f Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 11 Aug 2024 13:59:27 +0300 Subject: [PATCH 10/10] Refactor patch extension logic to handle cases with zero extra lines --- pr_agent/settings/configuration.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 0b1697b0e..5bfc5071e 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -20,8 +20,8 @@ max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. custom_model_max_tokens=-1 # for models not in the default list # -patch_extra_lines_before = 4 # Number of extra lines (+3 default ones) to include before each hunk in the patch -patch_extra_lines_after = 2 # Number of extra lines (+3 default ones) to include after each hunk in the patch +patch_extra_lines_before = 3 # Number of extra lines (+3 default ones) to include before each hunk in the patch +patch_extra_lines_after = 1 # Number of extra lines (+3 default ones) to include after each hunk in the patch secret_provider="" cli_mode=false ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs