forked from microsoft/autogen
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Text Compression Transform (microsoft#2225)
* adds implementation * handles optional import * cleanup * updates github workflows * skip test if dependencies not installed * skip test if dependencies not installed * use cpu * skip openai * unskip openai * adds protocol * better docstr * minor fixes * updates optional dependencies docs * wip * update docstrings * wip * adds back llmlingua requirement * finalized protocol * improve docstr * guide complete * improve docstr * fix FAQ * added cache support * improve cache key * cache key fix + faq fix * improve docs * improve guide * args -> params * spelling
- Loading branch information
1 parent
25bd6b2
commit 69bce13
Showing
10 changed files
with
503 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
autogen/agentchat/contrib/capabilities/text_compressors.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
from typing import Any, Dict, Optional, Protocol | ||
|
||
IMPORT_ERROR: Optional[Exception] = None | ||
try: | ||
import llmlingua | ||
except ImportError: | ||
IMPORT_ERROR = ImportError( | ||
"LLMLingua is not installed. Please install it with `pip install pyautogen[long-context]`" | ||
) | ||
PromptCompressor = object | ||
else: | ||
from llmlingua import PromptCompressor | ||
|
||
|
||
class TextCompressor(Protocol): | ||
"""Defines a protocol for text compression to optimize agent interactions.""" | ||
|
||
def compress_text(self, text: str, **compression_params) -> Dict[str, Any]: | ||
"""This method takes a string as input and returns a dictionary containing the compressed text and other | ||
relevant information. The compressed text should be stored under the 'compressed_text' key in the dictionary. | ||
To calculate the number of saved tokens, the dictionary should include 'origin_tokens' and 'compressed_tokens' keys. | ||
""" | ||
... | ||
|
||
|
||
class LLMLingua: | ||
"""Compresses text messages using LLMLingua for improved efficiency in processing and response generation. | ||
NOTE: The effectiveness of compression and the resultant token savings can vary based on the content of the messages | ||
and the specific configurations used for the PromptCompressor. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
prompt_compressor_kwargs: Dict = dict( | ||
model_name="microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank", | ||
use_llmlingua2=True, | ||
device_map="cpu", | ||
), | ||
structured_compression: bool = False, | ||
) -> None: | ||
""" | ||
Args: | ||
prompt_compressor_kwargs (dict): A dictionary of keyword arguments for the PromptCompressor. Defaults to a | ||
dictionary with model_name set to "microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank", | ||
use_llmlingua2 set to True, and device_map set to "cpu". | ||
structured_compression (bool): A flag indicating whether to use structured compression. If True, the | ||
structured_compress_prompt method of the PromptCompressor is used. Otherwise, the compress_prompt method | ||
is used. Defaults to False. | ||
dictionary. | ||
Raises: | ||
ImportError: If the llmlingua library is not installed. | ||
""" | ||
if IMPORT_ERROR: | ||
raise IMPORT_ERROR | ||
|
||
self._prompt_compressor = PromptCompressor(**prompt_compressor_kwargs) | ||
|
||
assert isinstance(self._prompt_compressor, llmlingua.PromptCompressor) | ||
self._compression_method = ( | ||
self._prompt_compressor.structured_compress_prompt | ||
if structured_compression | ||
else self._prompt_compressor.compress_prompt | ||
) | ||
|
||
def compress_text(self, text: str, **compression_params) -> Dict[str, Any]: | ||
return self._compression_method([text], **compression_params) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.