Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## [1.19.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.18.3...v1.19.0) (2024-09-13)


### Features

* integration of o1 ([5c25da2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c25da2fe64b4b64a00f1879f3d5dcfbf1512848))

## [1.19.0-beta.12](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.19.0-beta.11...v1.19.0-beta.12) (2024-09-14)


Expand All @@ -10,6 +17,7 @@

* added telemetry info ([62912c2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/62912c263ec7144e2d509925593027a60d258672))


## [1.19.0-beta.11](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.19.0-beta.10...v1.19.0-beta.11) (2024-09-13)


Expand Down
2 changes: 1 addition & 1 deletion examples/openai/smart_scraper_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
graph_config = {
"llm": {
"api_key": os.getenv("OPENAI_API_KEY"),
"model": "openai/gpt-4o",
"model": "openai/o1-preview",
},
"verbose": True,
"headless": False,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[project]
name = "scrapegraphai"


version = "1.19.0b12"


description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
authors = [
{ name = "Marco Vinciguerra", email = "[email protected]" },
Expand Down
7 changes: 5 additions & 2 deletions scrapegraphai/helpers/models_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
"gpt-4o-2024-08-06": 128000,
"gpt-4o-2024-05-13": 128000,
"gpt-4o-mini":128000,

"o1-preview":128000,
"o1-mini":128000
},
"azure_openai": {
"gpt-3.5-turbo-0125": 16385,
Expand All @@ -43,7 +44,9 @@
"gpt-4-32k-0613": 32768,
"gpt-4o": 128000,
"gpt-4o-mini":128000,
"chatgpt-4o-latest": 128000
"chatgpt-4o-latest": 128000,
"o1-preview":128000,
"o1-mini":128000
},
"google_genai": {
"gemini-pro": 128000,
Expand Down
53 changes: 38 additions & 15 deletions scrapegraphai/utils/copy.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,39 @@
"""
copy module
"""
import copy
from typing import Any, Dict, Optional
from pydantic.v1 import BaseModel
from typing import Any


class DeepCopyError(Exception):
"""Custom exception raised when an object cannot be deep-copied."""
"""
Custom exception raised when an object cannot be deep-copied.
"""

pass


def is_boto3_client(obj):
"""
Function for understanding if the script is using boto3 or not
"""
import sys

boto3_module = sys.modules.get("boto3")

if boto3_module:
try:
from botocore.client import BaseClient

return isinstance(obj, BaseClient)
except (AttributeError, ImportError):
return False
return False


def safe_deepcopy(obj: Any) -> Any:
"""
Attempts to create a deep copy of the object using `copy.deepcopy`
whenever possible. If that fails, it falls back to custom deep copy
logic. If that also fails, it raises a `DeepCopyError`.

Args:
obj (Any): The object to be copied, which can be of any type.

Expand All @@ -27,36 +46,40 @@ def safe_deepcopy(obj: Any) -> Any:
"""

try:

return copy.deepcopy(obj)
except (TypeError, AttributeError) as e:

if isinstance(obj, dict):
new_obj = {}

for k, v in obj.items():
new_obj[k] = safe_deepcopy(v)
return new_obj

elif isinstance(obj, list):
new_obj = []

for v in obj:
new_obj.append(safe_deepcopy(v))
return new_obj

elif isinstance(obj, tuple):
new_obj = tuple(safe_deepcopy(v) for v in obj)

return new_obj

elif isinstance(obj, frozenset):
new_obj = frozenset(safe_deepcopy(v) for v in obj)
return new_obj

elif hasattr(obj, "__dict__"):
elif is_boto3_client(obj):
return obj

else:
try:
return copy.copy(obj)
except (TypeError, AttributeError):
raise DeepCopyError(f"Cannot deep copy the object of type {type(obj)}") from e


try:
return copy.copy(obj)
except (TypeError, AttributeError):
raise DeepCopyError(f"Cannot deep copy the object of type {type(obj)}") from e
raise DeepCopyError(
f"Cannot deep copy the object of type {type(obj)}"
) from e
6 changes: 6 additions & 0 deletions tests/utils/copy_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,9 @@ def test_with_pydantic():
copy_obj = safe_deepcopy(original)
assert copy_obj.value == original.value
assert copy_obj is not original

def test_with_boto3():
import boto3
boto_client = boto3.client("bedrock-runtime", region_name="us-west-2")
copy_obj = safe_deepcopy(boto_client)
assert copy_obj == boto_client