-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Closed
Description
from scrapegraphai.graphs import SmartScraperGraph
graph_config2 = {
"llm": {
"model": "ollama/llama3",
"temperature": 0,
"format": "json",
"base_url": "http://localhost:11434",
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"base_url": "http://localhost:11434",
},
"verbose": True,
}
smart_scraper_graph3 = SmartScraperGraph(
prompt="Return the names, author names, ratings, and book links of all books on this page",
source="https://book.douban.com/top250",
config=graph_config2
)
result3 = smart_scraper_graph3.run()
print(result3) result3 = smart_scraper_graph3.run()
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\scrapegraphai\graphs\smart_scraper_graph.py", line 183, in run
self.final_state, self.execution_info = self.graph.execute(inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\scrapegraphai\graphs\base_graph.py", line 281, in execute
return self._execute_standard(initial_state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\scrapegraphai\graphs\base_graph.py", line 197, in _execute_standard
raise e
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\scrapegraphai\graphs\base_graph.py", line 181, in _execute_standard
result = current_node.execute(state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\scrapegraphai\nodes\parse_node.py", line 83, in execute
chunks = split_text_into_chunks(text=docs_transformed.page_content,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\scrapegraphai\utils\split_text_into_chunks.py", line 28, in split_text_into_chunks
chunks = chunk(text=text,
^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\semchunk\semchunk.py", line 129, in chunk
if token_counter(split) > chunk_size:
^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\scrapegraphai\utils\split_text_into_chunks.py", line 24, in count_tokens
return num_tokens_calculus(text, model)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\scrapegraphai\utils\tokenizer.py", line 30, in num_tokens_calculus
num_tokens = num_tokens_fn(string, llm_model)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\scrapegraphai\utils\tokenizers\tokenizer_ollama.py", line 26, in num_tokens_ollama
tokens = llm_model.get_num_tokens(text)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\langchain_core\language_models\base.py", line 365, in get_num_tokens
return len(self.get_token_ids(text))
^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\langchain_core\language_models\base.py", line 352, in get_token_ids
return _get_token_ids_default_method(text)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\langchain_core\language_models\base.py", line 76, in _get_token_ids_default_method
tokenizer = get_tokenizer()
^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\langchain_core\language_models\base.py", line 70, in get_tokenizer
return GPT2TokenizerFast.from_pretrained("gpt2")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "D:\anaconda3\envs\scrapegraphai\Lib\site-packages\transformers\tokenization_utils_base.py", line 2192, in from_pretrained
raise EnvironmentError(
OSError: Can't load tokenizer for 'gpt2'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'gpt2' is the correct path to a directory containing all relevant files for a GPT2TokenizerFast tokenizer.
source="https://book.douban.com/top250",
config=graph_config2
)
result3 = smart_scraper_graph3.run()
print(result3)