Skip to content

Commit

Permalink
perf: 更新x获取用户唯一ID类名
Browse files Browse the repository at this point in the history
  • Loading branch information
Johnserf-Seed committed Jan 20, 2025
1 parent 3333f7b commit f0cc3a9
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 16 deletions.
8 changes: 4 additions & 4 deletions f2/apps/twitter/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
BookmarkTweetFilter,
)
from f2.apps.twitter.utils import (
UserIdFetcher,
UniqueIdFetcher,
TweetIdFetcher,
create_or_rename_user_folder,
)
Expand Down Expand Up @@ -180,7 +180,7 @@ async def handle_post_tweet(self):
page_counts = self.kwargs.get("page_counts", 20)
max_counts = self.kwargs.get("max_counts")

uniqueID = await UserIdFetcher.get_user_id(self.kwargs.get("url"))
uniqueID = await UniqueIdFetcher.get_unique_id(self.kwargs.get("url"))
user = await self.fetch_user_profile(uniqueID)

async with AsyncUserDB("twitter_users.db") as udb:
Expand Down Expand Up @@ -275,7 +275,7 @@ async def handle_like_tweet(self):
page_counts = self.kwargs.get("page_counts", 20)
max_counts = self.kwargs.get("max_counts")

uniqueID = await UserIdFetcher.get_user_id(self.kwargs.get("url"))
uniqueID = await UniqueIdFetcher.get_unique_id(self.kwargs.get("url"))
user = await self.fetch_user_profile(uniqueID)

async with AsyncUserDB("twitter_users.db") as udb:
Expand Down Expand Up @@ -373,7 +373,7 @@ async def handle_bookmark_tweet(self):
page_counts = self.kwargs.get("page_counts", 20)
max_counts = self.kwargs.get("max_counts")

uniqueID = await UserIdFetcher.get_user_id(self.kwargs.get("url"))
uniqueID = await UniqueIdFetcher.get_unique_id(self.kwargs.get("url"))

async with AsyncUserDB("twitter_users.db") as udb:
user_path = await self.get_or_add_user_data(self.kwargs, uniqueID, udb)
Expand Down
29 changes: 17 additions & 12 deletions f2/apps/twitter/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,20 @@ def model_2_endpoint(
return final_endpoint


class UserIdFetcher:
class UniqueIdFetcher(BaseCrawler):
# https://x.com/CaroylnG61544
# https://x.com/CaroylnG61544/
# https://x.com/CaroylnG61544/followers
# https://x.com/CaroylnG61544/status/1440000000000000000
# https://twitter.com/CaroylnG61544/status/1440000000000000000/photo/1

# 预编译正则表达式
_USER_ID_PATTERN = re.compile(
_UNIQUE_ID_PATTERN = re.compile(
r"(?:https?://)?(?:www\.)?(twitter\.com|x\.com)/(?:@)?([a-zA-Z0-9_]+)"
)

@classmethod
async def get_user_id(cls, url: str) -> str:
async def get_unique_id(cls, url: str) -> str:
"""
从用户URL中提取用户ID
(Extract user ID from user URL)
Expand All @@ -104,7 +104,7 @@ async def get_user_id(cls, url: str) -> str:
url (str): 用户URL (User URL)
Returns:
str: 用户ID (User ID)
str: 用户唯一ID (User Unique Id)
"""

if not isinstance(url, str):
Expand All @@ -113,28 +113,33 @@ async def get_user_id(cls, url: str) -> str:
# 提取有效URL
url = extract_valid_urls(url)

match = cls._USER_ID_PATTERN.search(url)
if url is None:
raise (
APINotFoundError(_("输入的URL不合法。类名:{0}").format(cls.__name__))
)

match = cls._UNIQUE_ID_PATTERN.search(url)

if match:
return match.group(2)
else:
raise APINotFoundError(
_(
"未在响应的地址中找到user_id,检查链接是否为用户链接。类名:{0}"
"未在响应的地址中找到unique_id,检查链接是否为用户链接。类名:{0}"
).format(cls.__name__)
)

@classmethod
async def get_all_user_ids(cls, urls: list) -> list:
async def get_all_unique_ids(cls, urls: list) -> list:
"""
从用户URL列表中提取所有用户ID
(Extract all user IDs from the list of user URLs)
从用户URL列表中提取所有用户唯一ID
(Extract all unique ids from the list of user URLs)
Args:
urls (list): 用户URL列表 (List of user URLs)
Returns:
list: 用户ID列表 (List of user IDs)
list: 用户唯一ID列表 (List of unique ids)
"""

if not isinstance(urls, list):
Expand All @@ -151,8 +156,8 @@ async def get_all_user_ids(cls, urls: list) -> list:
)
)

user_ids = [cls.get_user_id(url) for url in urls]
return await asyncio.gather(*user_ids)
unique_ids = [cls.get_unique_id(url) for url in urls]
return await asyncio.gather(*unique_ids)


class TweetIdFetcher(BaseCrawler):
Expand Down

0 comments on commit f0cc3a9

Please sign in to comment.