Skip to content

Commit

Permalink
Fix issues identified by pre-commit hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
fm1320 committed Nov 22, 2024
1 parent 79b0b2e commit 3196631
Show file tree
Hide file tree
Showing 17 changed files with 88 additions and 72 deletions.
4 changes: 2 additions & 2 deletions adalflow/PACKAGING.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ pip install "dist/adalflow-0.1.0b1-py3-none-any.whl[openai,groq,faiss]"

1. Update the version in `pyproject.toml`
2. Add the version number in `adalflow/__init__.py`
3. Build the package
3. Build the package
4. Test the package locally
5. Push the changes to the repository
6. Ensure to run `poetry lock --no-update` in the root directory (project-level) to update the lock file for other directories such as `tutorials`, `use_cases`, `benchmarks`, etc.
7. Update the `CHANGELOG.md` file with the new version number and the changes made in the new version.

## TODO: we need to automate the version update process. Help is appreciated.
## TODO: we need to automate the version update process. Help is appreciated.
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,4 @@ async def acall(
elif model_type == ModelType.LLM:
return await self.async_client.messages.create(**api_kwargs)
else:
raise ValueError(f"model_type {model_type} is not supported")
raise ValueError(f"model_type {model_type} is not supported")
64 changes: 35 additions & 29 deletions adalflow/adalflow/components/model_client/bedrock_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,21 @@

bedrock_runtime_exceptions = boto3.client(
service_name="bedrock-runtime",
region_name=os.getenv("AWS_REGION_NAME", "us-east-1")
region_name=os.getenv("AWS_REGION_NAME", "us-east-1"),
).exceptions


def get_first_message_content(completion: Dict) -> str:
r"""When we only need the content of the first message.
It is the default parser for chat completion."""
return completion['output']['message']['content'][0]['text']
return completion["output"]["message"]["content"][0]["text"]


__all__ = ["BedrockAPIClient", "get_first_message_content", "bedrock_runtime_exceptions"]
__all__ = [
"BedrockAPIClient",
"get_first_message_content",
"bedrock_runtime_exceptions",
]


class BedrockAPIClient(ModelClient):
Expand All @@ -34,15 +38,15 @@ class BedrockAPIClient(ModelClient):
"""

def __init__(
self,
aws_profile_name=None,
aws_region_name=None,
aws_access_key_id=None,
aws_secret_access_key=None,
aws_session_token=None,
aws_connection_timeout=None,
aws_read_timeout=None,
chat_completion_parser: Callable = None,
self,
aws_profile_name=None,
aws_region_name=None,
aws_access_key_id=None,
aws_secret_access_key=None,
aws_session_token=None,
aws_connection_timeout=None,
aws_read_timeout=None,
chat_completion_parser: Callable = None,
):
super().__init__()
self._aws_profile_name = aws_profile_name
Expand All @@ -56,7 +60,7 @@ def __init__(
self.session = None
self.sync_client = self.init_sync_client()
self.chat_completion_parser = (
chat_completion_parser or get_first_message_content
chat_completion_parser or get_first_message_content
)

def init_sync_client(self):
Expand All @@ -67,14 +71,16 @@ def init_sync_client(self):
aws_profile_name = self._aws_profile_name or os.getenv("AWS_PROFILE_NAME")
aws_region_name = self._aws_region_name or os.getenv("AWS_REGION_NAME")
aws_access_key_id = self._aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = self._aws_secret_access_key or os.getenv("AWS_SECRET_ACCESS_KEY")
aws_secret_access_key = self._aws_secret_access_key or os.getenv(
"AWS_SECRET_ACCESS_KEY"
)
aws_session_token = self._aws_session_token or os.getenv("AWS_SESSION_TOKEN")

config = None
if self._aws_connection_timeout or self._aws_read_timeout:
config = Config(
connect_timeout=self._aws_connection_timeout, # Connection timeout in seconds
read_timeout=self._aws_read_timeout # Read timeout in seconds
read_timeout=self._aws_read_timeout, # Read timeout in seconds
)

session = boto3.Session(
Expand All @@ -93,7 +99,7 @@ def init_async_client(self):
def parse_chat_completion(self, completion):
log.debug(f"completion: {completion}")
try:
data = completion['output']['message']['content'][0]['text']
data = completion["output"]["message"]["content"][0]["text"]
usage = self.track_completion_usage(completion)
return GeneratorOutput(data=None, usage=usage, raw_response=data)
except Exception as e:
Expand All @@ -104,18 +110,18 @@ def parse_chat_completion(self, completion):

def track_completion_usage(self, completion: Dict) -> CompletionUsage:
r"""Track the completion usage."""
usage = completion['usage']
usage = completion["usage"]
return CompletionUsage(
completion_tokens=usage['outputTokens'],
prompt_tokens=usage['inputTokens'],
total_tokens=usage['totalTokens']
completion_tokens=usage["outputTokens"],
prompt_tokens=usage["inputTokens"],
total_tokens=usage["totalTokens"],
)

def convert_inputs_to_api_kwargs(
self,
input: Optional[Any] = None,
model_kwargs: Dict = {},
model_type: ModelType = ModelType.UNDEFINED
self,
input: Optional[Any] = None,
model_kwargs: Dict = {},
model_type: ModelType = ModelType.UNDEFINED,
):
"""
check the converse api doc here:
Expand All @@ -133,11 +139,11 @@ def convert_inputs_to_api_kwargs(
@backoff.on_exception(
backoff.expo,
(
bedrock_runtime_exceptions.ThrottlingException,
bedrock_runtime_exceptions.ModelTimeoutException,
bedrock_runtime_exceptions.InternalServerException,
bedrock_runtime_exceptions.ModelErrorException,
bedrock_runtime_exceptions.ValidationException
bedrock_runtime_exceptions.ThrottlingException,
bedrock_runtime_exceptions.ModelTimeoutException,
bedrock_runtime_exceptions.InternalServerException,
bedrock_runtime_exceptions.ModelErrorException,
bedrock_runtime_exceptions.ValidationException,
),
max_time=5,
)
Expand Down
2 changes: 1 addition & 1 deletion adalflow/adalflow/optim/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(
dataset: Sequence[DataClass] = None,
exclude_input_fields_from_bootstrap_demos: bool = False,
*args,
**kwargs
**kwargs,
):
self._weighted = weighted
self.dataset = dataset
Expand Down
2 changes: 1 addition & 1 deletion adalflow/adalflow/utils/lazy_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class LazyImport:
"""

def __init__(
self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
):
if args or kwargs:
raise TypeError(
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Benchmarking is an integral development part of the project.
Benchmarking is an integral development part of the project.

Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
Original file line number Diff line number Diff line change
Expand Up @@ -499,4 +499,4 @@
"answer": "grand assembly",
"type": "bridge"
}
]
]
2 changes: 1 addition & 1 deletion benchmarks/ReAct_agent/paper_data/paper_dev_10.json
Original file line number Diff line number Diff line change
Expand Up @@ -429,4 +429,4 @@
]
]
}
]
]
54 changes: 34 additions & 20 deletions benchmarks/ReAct_agent/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@
Apply the similar code for wikipedia search from the Paper (open-source).
"""


# copy code from the paper
def clean_str(p):
return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")


# normalization copied from the paper's code
def normalize_answer(s):
def remove_articles(text):
return re.sub(r"\b(a|an|the)\b", " ", text)

def white_space_fix(text):
return " ".join(text.split())

Expand All @@ -39,58 +41,70 @@ def search(entity: str) -> str:
# Format the entity for URL encoding
entity_formatted = entity.replace(" ", "+")
url = f"https://en.wikipedia.org/w/index.php?search={entity_formatted}"

# Fetch the page
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
soup = BeautifulSoup(response.text, "html.parser")

# Check if the exact page was found or suggest similar items
# when <div class=mw-search-result-heading> is detected, it means the entity page is not found on wikipedia
result_divs = soup.find_all("div", {"class": "mw-search-result-heading"})

if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities

if (
result_divs
): # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
# get Similar results
similar_titles = [div.a.get_text() for div in result_divs]
return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}" # return the top 5 similar titles
return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}" # return the top 5 similar titles
else:
# the paper uses page to represent content in <p>
# Extract xontent
page_list = [p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")]
page_list = [
p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")
]
# TODO: Recursive search, if find any concept that needs more search then call search again
# if any("may refer to:" in p for p in page_list):
# search(entity)

# restructure & clean the page content following the paper's logic
page = ''
page = ""
for p in page_list:
if len(p.split(" ")) > 2:
page += clean_str(p)
if not p.endswith("\n"):
page += "\n"
paragraphs = page.split("\n")
paragraphs = [p.strip() for p in paragraphs if p.strip()]

sentences = []
for p in paragraphs:
sentences += p.split('. ')
sentences = [s.strip() + '.' for s in sentences if s.strip()]
sentences += p.split(". ")
sentences = [s.strip() + "." for s in sentences if s.strip()]

# return the first 5 sentences
if sentences:
return ' '.join(sentences[:5]) if len(sentences)>=5 else ' '.join(sentences)
return (
" ".join(sentences[:5]) if len(sentences) >= 5 else " ".join(sentences)
)
else:
return "No content found on this page."

# TODO: clean the paragraphs and return the searched content


def lookup(text: str, keyword: str) -> str:
"""
returns the sentences containing keyword in the current passage.
returns the sentences containing keyword in the current passage.
"""
sentences = text.split('.')
matching_sentences = [sentence.strip() + '.' for sentence in sentences if keyword.lower() in sentence.lower()]
sentences = text.split(".")
matching_sentences = [
sentence.strip() + "."
for sentence in sentences
if keyword.lower() in sentence.lower()
]
if not matching_sentences:
return "No sentences found with the keyword."
else:
return ' '.join(matching_sentences) # Join all matching sentences into a single string
return " ".join(
matching_sentences
) # Join all matching sentences into a single string
2 changes: 1 addition & 1 deletion benchmarks/hotpot_qa/adal_exp/train_vanilla.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def train(
**gpt_3_model,
teacher_model_config=gpt_4o_model,
text_optimizer_model_config=gpt_4o_model,
backward_engine_model_config=gpt_4o_model
backward_engine_model_config=gpt_4o_model,
)
print(adal_component)
trainer = adal.Trainer(
Expand Down
2 changes: 1 addition & 1 deletion docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ SOURCEDIR = source
BUILDDIR = build
APIDOCOUTDIR = $(SOURCEDIR)/apis
PYTHON := $(shell command -v python3 2>/dev/null || command -v python 2>/dev/null)
POETRY = poetry
POETRY = poetry

# Put it first so that "make" without argument is like "make help".
help:
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ PyYAML
readthedocs-sphinx-search==0.3.2
numpy
tqdm
tiktoken
tiktoken
2 changes: 1 addition & 1 deletion docs/source/_static/css/custom.css
Original file line number Diff line number Diff line change
Expand Up @@ -344,4 +344,4 @@ table tr:hover {
font-size: 8px;
/* Further adjust text size for smallest screens */
}
}
}
8 changes: 4 additions & 4 deletions docs/source/contributor/version_control.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Overview
--------
**The version will mainly be managed by the LightRAG team. But we are glad to share how we will release the latest version here.**

This guide outlines the process for releasing a new version of ``LightRAG``.
This guide outlines the process for releasing a new version of ``LightRAG``.
The workflow pipeline validates the version tag, builds the package, runs tests, publishes to PyPI, and creates a release on GitHub. The workflow is triggered by tags pushed to the **Release** branch. See `GitHub tags <https://docs.github.com/en/desktop/managing-commits/managing-tags-in-github-desktop>`_ for more details on version release tagging.

Steps to Release a New Version
Expand All @@ -18,7 +18,7 @@ Steps to Release a New Version
[tool.poetry]
name = "lightrag"
version = "0.0.0-rc.1"
description = "The 'PyTorch' library for LLM applications. RAG=Retriever-Agent-Generator."
Expand Down Expand Up @@ -49,7 +49,7 @@ Steps to Release a New Version
git add lightrag/pyproject.toml
git commit -m "new version release"
git push origin release
Since the workflow only processes **tags**, your file submission will not go through the version release workflow.

Only the tags you pushed will get checked.
Expand All @@ -66,7 +66,7 @@ Steps to Release a New Version
.. code-block:: python
git tags # list the existing tags
git tag -d <tag>
git push origin --delete <tag>
Expand Down
4 changes: 0 additions & 4 deletions docs/source/resources/resources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,3 @@ Resources

Please check the GitHub for more information:
`GitHub repository <https://github.com/SylphAI-Inc/LightRAG>`_




2 changes: 1 addition & 1 deletion use_cases/classification/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def train(
debug=False,
max_steps=12,
strategy="constrained",
optimization_order="sequential"
optimization_order="sequential",
)
# val 0.694 -> 0.833, #test 0.8472 -> 0.833, adding more shots does not help
# NOTE: raw: 40, bootstrap: 4, max_steps: 8, strategy: random, val: 86.1, test: 86.8 (+4.2% compared with dspy)
Expand Down
2 changes: 1 addition & 1 deletion use_cases/question_answering/bbh/object_count/train_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def train(
**gpt_3_model,
teacher_model_config=gpt_4o_model,
text_optimizer_model_config=gpt_4o_model,
backward_engine_model_config=gpt_4o_model
backward_engine_model_config=gpt_4o_model,
)
print(adal_component)
trainer = adal.Trainer(
Expand Down

0 comments on commit 3196631

Please sign in to comment.