diff --git a/ftest/facebook_user_infos.py b/ftest/facebook_user_infos.py new file mode 100644 index 0000000000..b2e5ba74f7 --- /dev/null +++ b/ftest/facebook_user_infos.py @@ -0,0 +1,13 @@ +import csv +import sys +from tqdm import tqdm +import time +from minet.facebook import FacebookMobileScraper + +scraper = FacebookMobileScraper(cookie="firefox") + +USERS_URL = [ +] + +for url in USERS_URL: + print(scraper.user_infos(url)) \ No newline at end of file diff --git a/minet/cli/facebook/__init__.py b/minet/cli/facebook/__init__.py index 1b561cdb4f..6a97aa5416 100644 --- a/minet/cli/facebook/__init__.py +++ b/minet/cli/facebook/__init__.py @@ -247,6 +247,24 @@ $ minet fb url-likes url -i url.csv > url_likes.csv """, variadic_input={"dummy_column": "url", "item_label": "url"}, + +) + +FACEBOOK_USER_INFOS_SUBCOMMAND = command( + "user-infos", + "minet.cli.facebook.user_infos", + title="Minet Facebook User Infos Command", + description=""" + Retrieve the name, hometow, current city and gender of a given Facebook user.. + """, + epilog=""" + Examples: + + . Fetching user infos of a series of users in a CSV file: + $ minet fb user-infos user_url -i fb-users.csv > user-infos.csv + """, + variadic_input={"dummy_column": "user_url", "item_label": "user"}, + arguments=[*MOBILE_ARGUMENTS], ) FACEBOOK_COMMAND = command( @@ -265,5 +283,6 @@ FACEBOOK_POST_SUBCOMMAND, FACEBOOK_POSTS_SUBCOMMAND, FACEBOOK_URL_LIKES_SUBCOMMAND, + FACEBOOK_USER_INFOS_SUBCOMMAND ], ) diff --git a/minet/cli/facebook/user_infos.py b/minet/cli/facebook/user_infos.py new file mode 100644 index 0000000000..379c998032 --- /dev/null +++ b/minet/cli/facebook/user_infos.py @@ -0,0 +1,25 @@ +# ============================================================================= +# Minet Facebook User Places Lived CLI Action +# ============================================================================= +# +# Logic of the `fb user-places-lived` action. +# +from minet.cli.utils import with_enricher_and_loading_bar +from minet.cli.facebook.utils import with_facebook_fatal_errors +from minet.facebook import FacebookMobileScraper +from minet.facebook.types import MobileFacebookUserInfo + +@with_facebook_fatal_errors +@with_enricher_and_loading_bar( + headers=MobileFacebookUserInfo, title="Finding user profile infos", unit="users" +) +def action(cli_args, enricher, loading_bar): + scraper = FacebookMobileScraper(cli_args.cookie, throttle=cli_args.throttle) + + for i, row, user_url in enricher.enumerate_cells( + cli_args.column, with_rows=True, start=1 + ): + with loading_bar.step(): + user_infos = scraper.user_infos(user_url) + print(row) + enricher.writerow(row, user_infos.as_csv_row() if user_infos is not None else None) diff --git a/minet/facebook/mobile_scraper.py b/minet/facebook/mobile_scraper.py index 229bb09874..53698a309f 100644 --- a/minet/facebook/mobile_scraper.py +++ b/minet/facebook/mobile_scraper.py @@ -42,6 +42,7 @@ MobileFacebookUser, MobileFacebookPost, MobileFacebookPostWithReactions, + MobileFacebookUserInfo, ) from minet.facebook.exceptions import ( FacebookInvalidCookieError, @@ -770,3 +771,41 @@ def post_author(self, url): return MobileFacebookUser(user_label, parsed.id, parsed.handle, parsed.url) else: raise TypeError + + def user_infos(self, url) : + + url = convert_url_to_mobile(url) + + html = self.request_page(url) + soup = BeautifulSoupWithoutXHTMLWarnings(html, "lxml") + + name = soup.find('title').text + if name == 'Content Not Found' : + name = None + + hometown_field = soup.find('span', string='Hometown') + if hometown_field is not None : + hometown = hometown_field.parent.parent.next_sibling.text + else : + hometown = None + + current_city_field = soup.find('span', string='Current city') + if current_city_field is not None : + current_city = current_city_field.parent.parent.next_sibling.text + else : + current_city = None + + gender_field = soup.find('span', string='Gender') + if gender_field is not None : + gender = gender_field.parent.parent.next_sibling.text + else : + gender = None + + return MobileFacebookUserInfo(name, hometown, current_city, gender) + + + + + + + diff --git a/minet/facebook/types.py b/minet/facebook/types.py index 011c9d74cb..70a05d2e6e 100644 --- a/minet/facebook/types.py +++ b/minet/facebook/types.py @@ -137,6 +137,12 @@ class MobileFacebookUser(TabularRecord): handle: Optional[str] url: str +@dataclass +class MobileFacebookUserInfo(TabularRecord): + name: Optional[str] + hometown: Optional[str] + current_city: Optional[str] + gender: Optional[str] @dataclass class MobileFacebookPost(TabularRecord):