Fix issues identified by pre-commit hooks

SylphAI-Inc · Nov 22, 2024 · 3196631 · 3196631
1 parent 79b0b2e
commit 3196631
Show file tree

Hide file tree

Showing 17 changed files with 88 additions and 72 deletions.
diff --git a/adalflow/PACKAGING.md b/adalflow/PACKAGING.md
@@ -33,10 +33,10 @@ pip install "dist/adalflow-0.1.0b1-py3-none-any.whl[openai,groq,faiss]"
 
 1. Update the version in `pyproject.toml`
 2. Add the version number in `adalflow/__init__.py`
-3. Build the package 
+3. Build the package
 4. Test the package locally
 5. Push the changes to the repository
 6. Ensure to run `poetry lock --no-update` in the root directory (project-level) to update the lock file for other directories such as `tutorials`, `use_cases`, `benchmarks`, etc.
 7. Update the `CHANGELOG.md` file with the new version number and the changes made in the new version.
 
-## TODO: we need to automate the version update process. Help is appreciated.
+## TODO: we need to automate the version update process. Help is appreciated.
diff --git a/adalflow/adalflow/components/model_client/anthropic_client.py b/adalflow/adalflow/components/model_client/anthropic_client.py
@@ -167,4 +167,4 @@ async def acall(
         elif model_type == ModelType.LLM:
             return await self.async_client.messages.create(**api_kwargs)
         else:
-            raise ValueError(f"model_type {model_type} is not supported")
+            raise ValueError(f"model_type {model_type} is not supported")
diff --git a/adalflow/adalflow/components/model_client/bedrock_client.py b/adalflow/adalflow/components/model_client/bedrock_client.py
@@ -15,17 +15,21 @@
 
 bedrock_runtime_exceptions = boto3.client(
     service_name="bedrock-runtime",
-    region_name=os.getenv("AWS_REGION_NAME", "us-east-1")
+    region_name=os.getenv("AWS_REGION_NAME", "us-east-1"),
 ).exceptions
 
 
 def get_first_message_content(completion: Dict) -> str:
     r"""When we only need the content of the first message.
     It is the default parser for chat completion."""
-    return completion['output']['message']['content'][0]['text']
+    return completion["output"]["message"]["content"][0]["text"]
 
 
-__all__ = ["BedrockAPIClient", "get_first_message_content", "bedrock_runtime_exceptions"]
+__all__ = [
+    "BedrockAPIClient",
+    "get_first_message_content",
+    "bedrock_runtime_exceptions",
+]
 
 
 class BedrockAPIClient(ModelClient):
@@ -34,15 +38,15 @@ class BedrockAPIClient(ModelClient):
     """
 
     def __init__(
-            self,
-            aws_profile_name=None,
-            aws_region_name=None,
-            aws_access_key_id=None,
-            aws_secret_access_key=None,
-            aws_session_token=None,
-            aws_connection_timeout=None,
-            aws_read_timeout=None,
-            chat_completion_parser: Callable = None,
+        self,
+        aws_profile_name=None,
+        aws_region_name=None,
+        aws_access_key_id=None,
+        aws_secret_access_key=None,
+        aws_session_token=None,
+        aws_connection_timeout=None,
+        aws_read_timeout=None,
+        chat_completion_parser: Callable = None,
     ):
         super().__init__()
         self._aws_profile_name = aws_profile_name
@@ -56,7 +60,7 @@ def __init__(
         self.session = None
         self.sync_client = self.init_sync_client()
         self.chat_completion_parser = (
-                chat_completion_parser or get_first_message_content
+            chat_completion_parser or get_first_message_content
         )
 
     def init_sync_client(self):
@@ -67,14 +71,16 @@ def init_sync_client(self):
         aws_profile_name = self._aws_profile_name or os.getenv("AWS_PROFILE_NAME")
         aws_region_name = self._aws_region_name or os.getenv("AWS_REGION_NAME")
         aws_access_key_id = self._aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID")
-        aws_secret_access_key = self._aws_secret_access_key or os.getenv("AWS_SECRET_ACCESS_KEY")
+        aws_secret_access_key = self._aws_secret_access_key or os.getenv(
+            "AWS_SECRET_ACCESS_KEY"
+        )
         aws_session_token = self._aws_session_token or os.getenv("AWS_SESSION_TOKEN")
 
         config = None
         if self._aws_connection_timeout or self._aws_read_timeout:
             config = Config(
                 connect_timeout=self._aws_connection_timeout,  # Connection timeout in seconds
-                read_timeout=self._aws_read_timeout  # Read timeout in seconds
+                read_timeout=self._aws_read_timeout,  # Read timeout in seconds
             )
 
         session = boto3.Session(
@@ -93,7 +99,7 @@ def init_async_client(self):
     def parse_chat_completion(self, completion):
         log.debug(f"completion: {completion}")
         try:
-            data = completion['output']['message']['content'][0]['text']
+            data = completion["output"]["message"]["content"][0]["text"]
             usage = self.track_completion_usage(completion)
             return GeneratorOutput(data=None, usage=usage, raw_response=data)
         except Exception as e:
@@ -104,18 +110,18 @@ def parse_chat_completion(self, completion):
 
     def track_completion_usage(self, completion: Dict) -> CompletionUsage:
         r"""Track the completion usage."""
-        usage = completion['usage']
+        usage = completion["usage"]
         return CompletionUsage(
-            completion_tokens=usage['outputTokens'],
-            prompt_tokens=usage['inputTokens'],
-            total_tokens=usage['totalTokens']
+            completion_tokens=usage["outputTokens"],
+            prompt_tokens=usage["inputTokens"],
+            total_tokens=usage["totalTokens"],
         )
 
     def convert_inputs_to_api_kwargs(
-            self,
-            input: Optional[Any] = None,
-            model_kwargs: Dict = {},
-            model_type: ModelType = ModelType.UNDEFINED
+        self,
+        input: Optional[Any] = None,
+        model_kwargs: Dict = {},
+        model_type: ModelType = ModelType.UNDEFINED,
     ):
         """
         check the converse api doc here:
@@ -133,11 +139,11 @@ def convert_inputs_to_api_kwargs(
     @backoff.on_exception(
         backoff.expo,
         (
-                bedrock_runtime_exceptions.ThrottlingException,
-                bedrock_runtime_exceptions.ModelTimeoutException,
-                bedrock_runtime_exceptions.InternalServerException,
-                bedrock_runtime_exceptions.ModelErrorException,
-                bedrock_runtime_exceptions.ValidationException
+            bedrock_runtime_exceptions.ThrottlingException,
+            bedrock_runtime_exceptions.ModelTimeoutException,
+            bedrock_runtime_exceptions.InternalServerException,
+            bedrock_runtime_exceptions.ModelErrorException,
+            bedrock_runtime_exceptions.ValidationException,
         ),
         max_time=5,
     )

diff --git a/adalflow/adalflow/optim/optimizer.py b/adalflow/adalflow/optim/optimizer.py
@@ -67,7 +67,7 @@ def __init__(
         dataset: Sequence[DataClass] = None,
         exclude_input_fields_from_bootstrap_demos: bool = False,
         *args,
-        **kwargs
+        **kwargs,
     ):
         self._weighted = weighted
         self.dataset = dataset

diff --git a/adalflow/adalflow/utils/lazy_import.py b/adalflow/adalflow/utils/lazy_import.py
@@ -78,7 +78,7 @@ class LazyImport:
     """
 
     def __init__(
-            self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
+        self, import_path: str, optional_package: OptionalPackages, *args, **kwargs
     ):
         if args or kwargs:
             raise TypeError(

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -1,3 +1,3 @@
-Benchmarking is an integral development part of the project. 
+Benchmarking is an integral development part of the project.
 
-Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
+Contributors are encouraged to write benchmarks for their code, besides of the unit tests in `tests/` directory.
diff --git a/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json b/benchmarks/ReAct_agent/paper_data/hotpot_dev_v1_simplified_random_100.json
@@ -499,4 +499,4 @@
         "answer": "grand assembly",
         "type": "bridge"
     }
-]
+]
diff --git a/benchmarks/ReAct_agent/paper_data/paper_dev_10.json b/benchmarks/ReAct_agent/paper_data/paper_dev_10.json
@@ -429,4 +429,4 @@
             ]
         ]
     }
-]
+]
diff --git a/benchmarks/ReAct_agent/utils/tools.py b/benchmarks/ReAct_agent/utils/tools.py
@@ -9,15 +9,17 @@
 Apply the similar code for wikipedia search from the Paper (open-source).
 """
 
+
 # copy code from the paper
 def clean_str(p):
-  return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
+    return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8")
+
 
 # normalization copied from the paper's code
 def normalize_answer(s):
     def remove_articles(text):
         return re.sub(r"\b(a|an|the)\b", " ", text)
-  
+
     def white_space_fix(text):
         return " ".join(text.split())
 
@@ -39,58 +41,70 @@ def search(entity: str) -> str:
     # Format the entity for URL encoding
     entity_formatted = entity.replace(" ", "+")
     url = f"https://en.wikipedia.org/w/index.php?search={entity_formatted}"
-    
+
     # Fetch the page
     response = requests.get(url)
-    soup = BeautifulSoup(response.text, 'html.parser')
-    
+    soup = BeautifulSoup(response.text, "html.parser")
+
     # Check if the exact page was found or suggest similar items
     # when <div class=mw-search-result-heading> is detected, it means the entity page is not found on wikipedia
     result_divs = soup.find_all("div", {"class": "mw-search-result-heading"})
-
-    if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
+
+    if (
+        result_divs
+    ):  # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities
         # get Similar results
         similar_titles = [div.a.get_text() for div in result_divs]
-        return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}" # return the top 5 similar titles
+        return f"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}"  # return the top 5 similar titles
     else:
         # the paper uses page to represent content in <p>
         # Extract xontent
-        page_list = [p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")]
+        page_list = [
+            p.get_text().strip() for p in soup.find_all("p") + soup.find_all("ul")
+        ]
         # TODO: Recursive search, if find any concept that needs more search then call search again
         # if any("may refer to:" in p for p in page_list):
         #     search(entity)
 
         # restructure & clean the page content following the paper's logic
-        page = ''
+        page = ""
         for p in page_list:
             if len(p.split(" ")) > 2:
                 page += clean_str(p)
                 if not p.endswith("\n"):
                     page += "\n"
         paragraphs = page.split("\n")
         paragraphs = [p.strip() for p in paragraphs if p.strip()]
-        
+
         sentences = []
         for p in paragraphs:
-            sentences += p.split('. ')
-        sentences = [s.strip() + '.' for s in sentences if s.strip()]
-        
+            sentences += p.split(". ")
+        sentences = [s.strip() + "." for s in sentences if s.strip()]
+
         # return the first 5 sentences
         if sentences:
-            return ' '.join(sentences[:5]) if len(sentences)>=5 else ' '.join(sentences)
+            return (
+                " ".join(sentences[:5]) if len(sentences) >= 5 else " ".join(sentences)
+            )
         else:
             return "No content found on this page."
-        
+
         # TODO: clean the paragraphs and return the searched content
 
 
 def lookup(text: str, keyword: str) -> str:
     """
-        returns the sentences containing keyword in the current passage.
+    returns the sentences containing keyword in the current passage.
     """
-    sentences = text.split('.')
-    matching_sentences = [sentence.strip() + '.' for sentence in sentences if keyword.lower() in sentence.lower()]
+    sentences = text.split(".")
+    matching_sentences = [
+        sentence.strip() + "."
+        for sentence in sentences
+        if keyword.lower() in sentence.lower()
+    ]
     if not matching_sentences:
         return "No sentences found with the keyword."
     else:
-        return ' '.join(matching_sentences)  # Join all matching sentences into a single string
+        return " ".join(
+            matching_sentences
+        )  # Join all matching sentences into a single string
diff --git a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
@@ -114,7 +114,7 @@ def train(
         **gpt_3_model,
         teacher_model_config=gpt_4o_model,
         text_optimizer_model_config=gpt_4o_model,
-        backward_engine_model_config=gpt_4o_model
+        backward_engine_model_config=gpt_4o_model,
     )
     print(adal_component)
     trainer = adal.Trainer(

diff --git a/docs/Makefile b/docs/Makefile
@@ -8,7 +8,7 @@ SOURCEDIR     = source
 BUILDDIR      = build
 APIDOCOUTDIR  = $(SOURCEDIR)/apis
 PYTHON        := $(shell command -v python3 2>/dev/null || command -v python 2>/dev/null)
-POETRY        = poetry 
+POETRY        = poetry
 
 # Put it first so that "make" without argument is like "make help".
 help:

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -8,4 +8,4 @@ PyYAML
 readthedocs-sphinx-search==0.3.2
 numpy
 tqdm
-tiktoken
+tiktoken
diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css
@@ -344,4 +344,4 @@ table tr:hover {
 		font-size: 8px;
 		/* Further adjust text size for smallest screens */
 	}
-}
+}
diff --git a/docs/source/contributor/version_control.rst b/docs/source/contributor/version_control.rst
@@ -7,7 +7,7 @@ Overview
 --------
 **The version will mainly be managed by the LightRAG team. But we are glad to share how we will release the latest version here.**
 
-This guide outlines the process for releasing a new version of ``LightRAG``. 
+This guide outlines the process for releasing a new version of ``LightRAG``.
 The workflow pipeline validates the version tag, builds the package, runs tests, publishes to PyPI, and creates a release on GitHub. The workflow is triggered by tags pushed to the **Release** branch. See `GitHub tags <https://docs.github.com/en/desktop/managing-commits/managing-tags-in-github-desktop>`_ for more details on version release tagging.
 
 Steps to Release a New Version
@@ -18,7 +18,7 @@ Steps to Release a New Version
 
       [tool.poetry]
       name = "lightrag"
-      
+
       version = "0.0.0-rc.1"
       description = "The 'PyTorch' library for LLM applications. RAG=Retriever-Agent-Generator."
 
@@ -49,7 +49,7 @@ Steps to Release a New Version
       git add lightrag/pyproject.toml
       git commit -m "new version release"
       git push origin release
-   
+
    Since the workflow only processes **tags**, your file submission will not go through the version release workflow.
 
    Only the tags you pushed will get checked.
@@ -66,7 +66,7 @@ Steps to Release a New Version
    .. code-block:: python
 
       git tags # list the existing tags
-      
+
       git tag -d <tag>
       git push origin --delete <tag>
 

diff --git a/docs/source/resources/resources.rst b/docs/source/resources/resources.rst
@@ -3,7 +3,3 @@ Resources
 
 Please check the GitHub for more information:
 `GitHub repository <https://github.com/SylphAI-Inc/LightRAG>`_
-
-
-
-
diff --git a/use_cases/classification/train.py b/use_cases/classification/train.py
@@ -126,7 +126,7 @@ def train(
         debug=False,
         max_steps=12,
         strategy="constrained",
-        optimization_order="sequential"
+        optimization_order="sequential",
     )
     # val 0.694 -> 0.833, #test 0.8472 -> 0.833, adding more shots does not help
     # NOTE: raw: 40, bootstrap: 4, max_steps: 8, strategy: random, val: 86.1, test: 86.8 (+4.2% compared with dspy)

diff --git a/use_cases/question_answering/bbh/object_count/train_new.py b/use_cases/question_answering/bbh/object_count/train_new.py
@@ -111,7 +111,7 @@ def train(
         **gpt_3_model,
         teacher_model_config=gpt_4o_model,
         text_optimizer_model_config=gpt_4o_model,
-        backward_engine_model_config=gpt_4o_model
+        backward_engine_model_config=gpt_4o_model,
     )
     print(adal_component)
     trainer = adal.Trainer(
-Original file line number
+Diff line change
@@ Expand Up / @@ -499,4 +499,4 @@ @@
             "answer": "grand assembly",
             "type": "bridge"
         }
-    ]
+    ]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,7 +3,3 @@ Resources

		Please check the GitHub for more information:
		`GitHub repository <https://github.com/SylphAI-Inc/LightRAG>`_