Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Integrate video reading #883

Merged
merged 42 commits into from
Nov 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
5b28730
Run through the basic functions
Aug 31, 2024
eaad632
complete the video_toolkit
Aug 31, 2024
1dbcb57
added the forced download function and added test files
Aug 31, 2024
07c61bd
chore: Refactor VideoDownloaderToolkit to improve code readability an…
Sep 3, 2024
aa95c22
feat: Add video screenshot capture functionality
Sep 3, 2024
845cd00
feat: Add web video description extractor example
Sep 3, 2024
85d5dce
feat: Add web video object recognition example
Sep 3, 2024
76d9151
feat: Add DuckDuckGo video object recognition example
Sep 6, 2024
b842f45
feat: Add DuckDuckGo video object recognition results
Sep 6, 2024
0e501d9
refactor: update VideoDownloaderToolkit class and add download direct…
Sep 10, 2024
68f4d52
fix: Change test file directory
Sep 11, 2024
a3a7119
refactor: Update .gitignore and test_video_function.py
Sep 11, 2024
7f1ce25
fix: Fix minor bugs and format issues
Sep 11, 2024
0e3ad4f
polish docstring format
Wendong-Fan Sep 12, 2024
f35c1ee
refactor: Update VideoDownloaderToolkit class and Removed the split_i…
Sep 12, 2024
88db014
refactor: Update video_toolkit.py and test_video_function.py
Sep 13, 2024
157c76c
refactor: Update utils/__init__.py and test_video_function.py
Sep 13, 2024
bca0eeb
refactor: Update VideoDownloaderToolkit class
Sep 13, 2024
e1eda38
Merge remote-tracking branch 'origin/master' into integrate-video-rea…
Sep 13, 2024
3c0c3bb
Refactor poetry.lock
Sep 13, 2024
2c9d137
Add yt-dlp to optional dependencies
Sep 13, 2024
75a804f
fix: Refactor VideoDownloaderToolkit class and update utils/__init__.…
Sep 14, 2024
89e71a9
fix: VideoDownloaderToolkit examples
Sep 19, 2024
ff2e5e6
refactor
Sep 23, 2024
710a474
refactor: Remove the functionality of specifying a URL in the downloa…
Sep 23, 2024
d0c24bb
refactor: Remove URL parameter from VideoDownloaderToolkit constructo…
Sep 23, 2024
cadb897
refactor: Format
Sep 23, 2024
10884d0
fix
Sep 23, 2024
3463537
merge master
WHALEEYE Nov 8, 2024
22dd1fa
refactor codes
WHALEEYE Nov 9, 2024
55c3ab0
Merge branch 'master' into integrate-video-reading
WHALEEYE Nov 9, 2024
05d8683
small fix
WHALEEYE Nov 9, 2024
39d4ea3
test: change test to mocks
WHALEEYE Nov 16, 2024
d7ecc96
merge master
WHALEEYE Nov 16, 2024
b372d30
Merge branch 'master' into integrate-video-reading
Wendong-Fan Nov 22, 2024
bb868dd
polish based on review
Wendong-Fan Nov 22, 2024
ee5a803
Merge branch 'master' into integrate-video-reading
WHALEEYE Nov 23, 2024
bc10497
update examples
WHALEEYE Nov 24, 2024
852bd7b
`test` folder naming
Wendong-Fan Nov 24, 2024
9bded21
pre commit
Wendong-Fan Nov 24, 2024
3847346
update
Wendong-Fan Nov 24, 2024
df4b1ec
update test
Wendong-Fan Nov 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -416,10 +416,14 @@ tags
# Camel
logs/

# Download Configuration
cookies.txt

# Data
datasets_test/
evaluation_data/
camel_data/
temp_files/

#Benchmark
benchmark/gaia/Dataset
Expand Down
2 changes: 2 additions & 0 deletions camel/toolkits/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from .retrieval_toolkit import RetrievalToolkit
from .notion_toolkit import NotionToolkit
from .human_toolkit import HumanToolkit
from .video_toolkit import VideoDownloaderToolkit

__all__ = [
'BaseToolkit',
Expand Down Expand Up @@ -66,4 +67,5 @@
'NotionToolkit',
'ArxivToolkit',
'HumanToolkit',
'VideoDownloaderToolkit',
]
211 changes: 211 additions & 0 deletions camel/toolkits/video_toolkit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========

import io
import logging
import re
import tempfile
from pathlib import Path
from typing import List, Optional

from PIL import Image

from camel.toolkits.base import BaseToolkit
from camel.toolkits.function_tool import FunctionTool
from camel.utils import dependencies_required

logger = logging.getLogger(__name__)


def _standardize_url(url: str) -> str:
r"""Standardize the given URL."""
# Special case for YouTube embed URLs
if "youtube.com/embed/" in url:
match = re.search(r"embed/([a-zA-Z0-9_-]+)", url)
if match:
return f"https://www.youtube.com/watch?v={match.group(1)}"
else:
raise ValueError(f"Invalid YouTube URL: {url}")

return url


def _capture_screenshot(video_file: str, timestamp: float) -> Image.Image:
r"""Capture a screenshot from a video file at a specific timestamp.

Args:
video_file (str): The path to the video file.
timestamp (float): The time in seconds from which to capture the
screenshot.

Returns:
Image.Image: The captured screenshot in the form of Image.Image.
"""
import ffmpeg

try:
out, _ = (
ffmpeg.input(video_file, ss=timestamp)
.filter('scale', 320, -1)
.output('pipe:', vframes=1, format='image2', vcodec='png')
.run(capture_stdout=True, capture_stderr=True)
)
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to capture screenshot: {e.stderr}")

return Image.open(io.BytesIO(out))


class VideoDownloaderToolkit(BaseToolkit):
r"""A class for downloading videos and optionally splitting them into
chunks.

Args:
download_directory (Optional[str], optional): The directory where the
video will be downloaded to. If not provided, video will be stored
in a temporary directory and will be cleaned up after use.
(default: :obj:`None`)
cookies_path (Optional[str], optional): The path to the cookies file
for the video service in Netscape format. (default: :obj:`None`)
"""

@dependencies_required("yt_dlp", "ffmpeg")
def __init__(
self,
download_directory: Optional[str] = None,
cookies_path: Optional[str] = None,
) -> None:
self._cleanup = download_directory is None
self._cookies_path = cookies_path

self._download_directory = Path(
download_directory or tempfile.mkdtemp()
).resolve()

try:
self._download_directory.mkdir(parents=True, exist_ok=True)
except FileExistsError:
raise ValueError(
f"{self._download_directory} is not a valid directory."
)
except OSError as e:
raise ValueError(
f"Error creating directory {self._download_directory}: {e}"
)

logger.info(f"Video will be downloaded to {self._download_directory}")

def __del__(self) -> None:
r"""Deconstructor for the VideoDownloaderToolkit class.

Cleans up the downloaded video if they are stored in a temporary
directory.
"""
import shutil

if self._cleanup:
shutil.rmtree(self._download_directory, ignore_errors=True)

def _download_video(self, url: str) -> str:
r"""Download the video and optionally split it into chunks.

yt-dlp will detect if the video is downloaded automatically so there
is no need to check if the video exists.

Returns:
str: The path to the downloaded video file.
"""
import yt_dlp

video_template = self._download_directory / "%(title)s.%(ext)s"
ydl_opts = {
'format': 'bestvideo+bestaudio/best',
'outtmpl': str(video_template),
'force_generic_extractor': True,
'cookiefile': self._cookies_path,
}

try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# Download the video and get the filename
logger.info(f"Downloading video from {url}...")
info = ydl.extract_info(url, download=True)
return ydl.prepare_filename(info)
except yt_dlp.utils.DownloadError as e:
raise RuntimeError(f"Failed to download video from {url}: {e}")

def get_video_bytes(
self,
video_url: str,
) -> bytes:
r"""Download video by the URL, and return the content in bytes.

Args:
video_url (str): The URL of the video to download.

Returns:
bytes: The video file content in bytes.
"""
url = _standardize_url(video_url)
video_file = self._download_video(url)

with open(video_file, 'rb') as f:
video_bytes = f.read()

return video_bytes

def get_video_screenshots(
self, video_url: str, amount: int
) -> List[Image.Image]:
r"""Capture screenshots from the video at specified timestamps or by
dividing the video into equal parts if an integer is provided.

Args:
video_url (str): The URL of the video to take screenshots.
amount (int): the amount of evenly split screenshots to capture.

Returns:
List[Image.Image]: A list of screenshots as Image.Image.
"""
import ffmpeg

url = _standardize_url(video_url)
video_file = self._download_video(url)

# Get the video length
try:
probe = ffmpeg.probe(video_file)
video_length = float(probe['format']['duration'])
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to determine video length: {e.stderr}")

interval = video_length / (amount + 1)
timestamps = [i * interval for i in range(1, amount + 1)]

images = [_capture_screenshot(video_file, ts) for ts in timestamps]

return images

def get_tools(self) -> List[FunctionTool]:
r"""Returns a list of FunctionTool objects representing the
functions in the toolkit.

Returns:
List[FunctionTool]: A list of FunctionTool objects representing
the functions in the toolkit.
"""
return [
FunctionTool(self.get_video_bytes),
FunctionTool(self.get_video_screenshots),
]
110 changes: 110 additions & 0 deletions examples/vision/duckduckgo_video_object_recognition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import List

from PIL import Image

from camel.agents import ChatAgent
from camel.messages import BaseMessage
from camel.prompts import PromptTemplateGenerator
from camel.toolkits import SearchToolkit, VideoDownloaderToolkit
from camel.types import RoleType, TaskType


def detect_image_obj(image_list: List[Image.Image]) -> None:
sys_msg = PromptTemplateGenerator().get_prompt_from_key(
TaskType.OBJECT_RECOGNITION, RoleType.ASSISTANT
)
print("=" * 20 + " SYS MSG " + "=" * 20)
print(sys_msg)
print("=" * 49)
agent = ChatAgent(sys_msg)

user_msg = BaseMessage.make_user_message(
role_name="User",
content="Please start the object detection for the following images!",
image_list=image_list,
image_detail="high",
)

assistant_response = agent.step(user_msg)
print("=" * 20 + " RESULT " + "=" * 20)
print(assistant_response.msgs[0].content)
print("=" * 48)


def main():
# Create an instance of the SearchToolkit
search_toolkit = SearchToolkit()

# Example query for DuckDuckGo video search
query = "The future of AI in education"

# Perform a DuckDuckGo search with the query, setting source to 'videos'
results = search_toolkit.search_duckduckgo(
query=query, source="videos", max_results=5
)

# Try to download videos from the search results
for result in results:
video_url = result['embed_url']
if not video_url:
print(f"No valid video URL provided for result: {result}")
continue

print(f"Trying to download video from: {video_url}")
downloader = VideoDownloaderToolkit()
image_list = downloader.get_video_screenshots(video_url, 3)
if image_list and len(image_list) > 0:
print(
f'''Successfully downloaded video and captured screenshots
from: {video_url}'''
)
detect_image_obj(image_list)
print("Stopping further downloads as we found valid images.")
break
else:
print(f"Failed to capture screenshots from video: {video_url}")

print("Exited the video download loop.")


if __name__ == "__main__":
main()

"""
===============================================================================
Successfully downloaded video and captured screenshots
from: https://www.youtube.com/embed/RRMVF0PPqZI?autoplay=1
==================== SYS MSG ====================
You have been assigned an object recognition task.
Your mission is to list all detected objects in following image.
Your output should always be a list of strings starting with `1.`, `2.` etc.
Do not explain yourself or output anything else.
=================================================
==================== RESULT ====================
1. Drone
2. Hangar
3. Person (in uniform)
4. Plants
5. Wall (brick)
6. Table
7. Electrical panels
8. Lights
9. Floor
================================================
Stopping further downloads as we found valid images.
Exited the video download loop.
===============================================================================
"""
Loading
Loading