Skip to content

Commit 3627ca4

Browse files
BeibinLisonichi
andauthored
Extact_code can detect single-line code now (#2)
* Extact_code can detect single-line code now * Add comments for RE * Add test case and adjust UNKNOWN behavior * Remove tmp test files * Update autogen/code_utils.py --------- Co-authored-by: Chi Wang <[email protected]>
1 parent 8a43c3b commit 3627ca4

File tree

2 files changed

+52
-11
lines changed

2 files changed

+52
-11
lines changed

autogen/code_utils.py

+29-10
Original file line numberDiff line numberDiff line change
@@ -34,24 +34,43 @@ def infer_lang(code):
3434
return "python"
3535

3636

37-
def extract_code(text: str, pattern: str = CODE_BLOCK_PATTERN) -> List[Tuple[str, str]]:
37+
def extract_code(
38+
text: str, pattern: str = CODE_BLOCK_PATTERN, detect_single_line_code: bool = False
39+
) -> List[Tuple[str, str]]:
3840
"""Extract code from a text.
3941
4042
Args:
4143
text (str): The text to extract code from.
42-
pattern (Optional, str): The regular expression pattern for finding the code block.
44+
pattern (str, optional): The regular expression pattern for finding the
45+
code block. Defaults to CODE_BLOCK_PATTERN.
46+
detect_single_line_code (bool, optional): Enable the new feature for
47+
extracting single line code. Defaults to False.
4348
4449
Returns:
4550
list: A list of tuples, each containing the language and the code.
51+
If there is no code block in the input text, the language would be "unknown".
52+
If there is code block but the language is not specified, the language would be "".
4653
"""
47-
# Use a regular expression to find all the code blocks
48-
match = re.findall(pattern, text, flags=re.DOTALL)
49-
# match = re.search(pattern, text, flags=re.DOTALL)
50-
# If a match is found, return the code
51-
# if match:
52-
# return match.group(2), match.group(1)
53-
# If no code block is found, return the whole text
54-
return match if match else [(UNKNOWN, text)]
54+
if not detect_single_line_code:
55+
match = re.findall(pattern, text, flags=re.DOTALL)
56+
return match if match else [(UNKNOWN, text)]
57+
58+
# Extract both multi-line and single-line code block, separated by the | operator
59+
# `{3}(\w+)?\s*([\s\S]*?)`{3}: Matches multi-line code blocks.
60+
# The (\w+)? matches the language, where the ? indicates it is optional.
61+
# `([^`]+)`: Matches inline code.
62+
code_pattern = re.compile(r"`{3}(\w+)?\s*([\s\S]*?)`{3}|`([^`]+)`")
63+
code_blocks = code_pattern.findall(text)
64+
65+
# Extract the individual code blocks and languages from the matched groups
66+
extracted = []
67+
for lang, group1, group2 in code_blocks:
68+
if group1:
69+
extracted.append((lang.strip(), group1.strip()))
70+
elif group2:
71+
extracted.append(("", group2.strip()))
72+
73+
return extracted
5574

5675

5776
# _FIND_CODE_SYS_MSG = [

test/test_code.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,23 @@ def test_extract_code():
161161
```
162162
print("hello extract code")
163163
```
164-
"""
164+
""",
165+
detect_single_line_code=False,
165166
)
166167
print(codeblocks)
167168

169+
codeblocks2 = extract_code(
170+
"""
171+
Example:
172+
```
173+
print("hello extract code")
174+
```
175+
""",
176+
detect_single_line_code=True,
177+
)
178+
assert codeblocks2 == codeblocks
179+
# import pdb; pdb.set_trace()
180+
168181
codeblocks = extract_code(
169182
"""
170183
Example:
@@ -190,6 +203,15 @@ def scrape(url):
190203
codeblocks = extract_code("no code block")
191204
assert len(codeblocks) == 1 and codeblocks[0] == (UNKNOWN, "no code block")
192205

206+
# Disable single line code detection
207+
line = "Run `source setup.sh` from terminal"
208+
codeblocks = extract_code(line, detect_single_line_code=False)
209+
assert len(codeblocks) == 1 and codeblocks[0] == (UNKNOWN, line)
210+
211+
# Enable single line code detection
212+
codeblocks = extract_code("Run `source setup.sh` from terminal", detect_single_line_code=True)
213+
assert len(codeblocks) == 1 and codeblocks[0] == ("", "source setup.sh")
214+
193215

194216
@pytest.mark.skipif(
195217
sys.platform in ["darwin", "win32"],

0 commit comments

Comments
 (0)