From bdef6dc4384d03dab4be6f0e873fa5fe82ccbbd1 Mon Sep 17 00:00:00 2001 From: camillechanial Date: Thu, 22 Feb 2024 10:51:19 +0100 Subject: [PATCH] added scraping of FB users places lived --- ftest/facebook_user_places_lived.py | 32 +++++++++++++++++++++++++ minet/cli/facebook/__init__.py | 19 +++++++++++++++ minet/cli/facebook/user_places_lived.py | 27 +++++++++++++++++++++ minet/facebook/mobile_scraper.py | 29 ++++++++++++++++++++++ minet/facebook/types.py | 4 ++++ 5 files changed, 111 insertions(+) create mode 100644 ftest/facebook_user_places_lived.py create mode 100644 minet/cli/facebook/user_places_lived.py diff --git a/ftest/facebook_user_places_lived.py b/ftest/facebook_user_places_lived.py new file mode 100644 index 0000000000..073602b24c --- /dev/null +++ b/ftest/facebook_user_places_lived.py @@ -0,0 +1,32 @@ +import csv +import sys +from tqdm import tqdm + +from minet.facebook import FacebookMobileScraper + +scraper = FacebookMobileScraper(cookie="firefox") + +USERS_URL = [ + 'https://www.facebook.com/cyr.esseh', + 'https://www.facebook.com/akim.malonga', + 'https://www.facebook.com/sarah.matoko', + 'https://www.facebook.com/chrismonick', + 'https://www.facebook.com/mavie.coeurmbeye', + 'https://www.facebook.com/clement.tsith', + # 'https://www.facebook.com/johnny.levey.5', + # 'https://www.facebook.com/profile.php?id=100004392515502', + # 'https://www.facebook.com/blaidynson', + # 'https://www.facebook.com/fabien.Cydel', + # 'https://www.facebook.com/xavierdestaing.baboueya', + # 'https://www.facebook.com/beaugarel.malonga', + # 'https://www.facebook.com/cedric.mabiala.714', + # 'https://www.facebook.com/gracedaisy.londa', + # 'https://www.facebook.com/aichath.tidjani.35', + # 'https://www.facebook.com/profile.php?id=100022219963045', + # 'https://www.facebook.com/zadkiel.esuszico', + # 'https://www.facebook.com/claude.bikoulou.7', + # 'https://www.facebook.com/sheila.mabiala.1' +] + +for url in USERS_URL: + print(scraper.user_places_lived_info(url)) diff --git a/minet/cli/facebook/__init__.py b/minet/cli/facebook/__init__.py index 1b561cdb4f..0e162da6b7 100644 --- a/minet/cli/facebook/__init__.py +++ b/minet/cli/facebook/__init__.py @@ -247,6 +247,24 @@ $ minet fb url-likes url -i url.csv > url_likes.csv """, variadic_input={"dummy_column": "url", "item_label": "url"}, + +) + +FACEBOOK_USER_PLACES_LIVED_SUBCOMMAND = command( + "user-places-lived", + "minet.cli.facebook.user_places_lived", + title="Minet Facebook User Places Lived Command", + description=""" + Retrieve the hometown and current city of a given Facebook user.. + """, + epilog=""" + Examples: + + . Fetching placed lived of a series of users in a CSV file: + $ minet fb user-places-lived user_url -i fb-users.csv > placed-lived.csv + """, + variadic_input={"dummy_column": "user_url", "item_label": "user"}, + arguments=[*MOBILE_ARGUMENTS], ) FACEBOOK_COMMAND = command( @@ -265,5 +283,6 @@ FACEBOOK_POST_SUBCOMMAND, FACEBOOK_POSTS_SUBCOMMAND, FACEBOOK_URL_LIKES_SUBCOMMAND, + FACEBOOK_USER_PLACES_LIVED_SUBCOMMAND ], ) diff --git a/minet/cli/facebook/user_places_lived.py b/minet/cli/facebook/user_places_lived.py new file mode 100644 index 0000000000..6f4db0bd31 --- /dev/null +++ b/minet/cli/facebook/user_places_lived.py @@ -0,0 +1,27 @@ +# ============================================================================= +# Minet Facebook User Places Lived CLI Action +# ============================================================================= +# +# Logic of the `fb user-places-lived` action. +# +from minet.cli.utils import with_enricher_and_loading_bar +from minet.cli.facebook.utils import with_facebook_fatal_errors +from minet.facebook import FacebookMobileScraper +from minet.facebook.types import MobileFacebookUserPlacesLived +from minet.facebook.exceptions import FacebookInvalidTargetError + + +@with_facebook_fatal_errors +@with_enricher_and_loading_bar( + headers=MobileFacebookUserPlacesLived, title="Finding places lived", unit="users" +) +def action(cli_args, enricher, loading_bar): + scraper = FacebookMobileScraper(cli_args.cookie, throttle=cli_args.throttle) + + for i, row, user_url in enricher.enumerate_cells( + cli_args.column, with_rows=True, start=1 + ): + with loading_bar.step(): + places_lived = scraper.user_places_lived_info(user_url) + print(row) + enricher.writerow(row, places_lived.as_csv_row() if places_lived is not None else None) diff --git a/minet/facebook/mobile_scraper.py b/minet/facebook/mobile_scraper.py index 229bb09874..f431959ff6 100644 --- a/minet/facebook/mobile_scraper.py +++ b/minet/facebook/mobile_scraper.py @@ -42,6 +42,7 @@ MobileFacebookUser, MobileFacebookPost, MobileFacebookPostWithReactions, + MobileFacebookUserPlacesLived ) from minet.facebook.exceptions import ( FacebookInvalidCookieError, @@ -770,3 +771,31 @@ def post_author(self, url): return MobileFacebookUser(user_label, parsed.id, parsed.handle, parsed.url) else: raise TypeError + + def user_places_lived_info(self, url) : + + url = convert_url_to_mobile(url) + + html = self.request_page(url) + soup = BeautifulSoupWithoutXHTMLWarnings(html, "lxml") + + hometown_element = soup.find('span', string='Hometown') + if hometown_element is not None : + hometown = hometown_element.parent.parent.next_sibling.text + else : + hometown = None + + current_city_element = soup.find('span', string='Current city') + if current_city_element is not None : + current_city = current_city_element.parent.parent.next_sibling.text + else : + current_city = None + + return MobileFacebookUserPlacesLived(hometown, current_city) + + + + + + + diff --git a/minet/facebook/types.py b/minet/facebook/types.py index 011c9d74cb..49346e80b9 100644 --- a/minet/facebook/types.py +++ b/minet/facebook/types.py @@ -137,6 +137,10 @@ class MobileFacebookUser(TabularRecord): handle: Optional[str] url: str +@dataclass +class MobileFacebookUserPlacesLived(TabularRecord): + hometown: Optional[str] + current_city: Optional[str] @dataclass class MobileFacebookPost(TabularRecord):