Skip to content

Commit 99e8fcd

Browse files
committed
Porting the facebook submodule to tabular records
1 parent 719aa4f commit 99e8fcd

File tree

8 files changed

+70
-99
lines changed

8 files changed

+70
-99
lines changed

minet/cli/facebook/comments.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
from minet.cli.utils import with_enricher_and_loading_bar
88
from minet.cli.facebook.utils import with_facebook_fatal_errors
99
from minet.facebook import FacebookMobileScraper
10-
from minet.facebook.constants import FACEBOOK_COMMENT_CSV_HEADERS
10+
from minet.facebook.types import MobileFacebookComment
1111
from minet.facebook.exceptions import FacebookInvalidTargetError
1212

1313

1414
@with_facebook_fatal_errors
1515
@with_enricher_and_loading_bar(
16-
headers=FACEBOOK_COMMENT_CSV_HEADERS,
16+
headers=MobileFacebookComment,
1717
title="Scraping comments",
1818
unit="posts",
1919
nested=True,

minet/cli/facebook/post.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@
1010
print_translation_warning_if_needed,
1111
)
1212
from minet.facebook import FacebookMobileScraper
13-
from minet.facebook.constants import (
14-
FACEBOOK_POST_WITH_REACTIONS_CSV_HEADERS,
15-
)
13+
from minet.facebook.types import MobileFacebookPostWithReactions
1614
from minet.facebook.exceptions import (
1715
FacebookInvalidTargetError,
1816
FacebookNotPostError,
@@ -28,7 +26,7 @@
2826

2927
@with_facebook_fatal_errors
3028
@with_enricher_and_loading_bar(
31-
headers=FACEBOOK_POST_WITH_REACTIONS_CSV_HEADERS,
29+
headers=MobileFacebookPostWithReactions,
3230
title="Scraping posts",
3331
unit="posts",
3432
)

minet/cli/facebook/post_authors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
from minet.cli.utils import with_enricher_and_loading_bar
88
from minet.cli.facebook.utils import with_facebook_fatal_errors
99
from minet.facebook import FacebookMobileScraper
10-
from minet.facebook.constants import FACEBOOK_USER_CSV_HEADERS
10+
from minet.facebook.types import MobileFacebookUser
1111
from minet.facebook.exceptions import FacebookInvalidTargetError
1212

1313

1414
@with_facebook_fatal_errors
1515
@with_enricher_and_loading_bar(
16-
headers=FACEBOOK_USER_CSV_HEADERS, title="Finding authors", unit="posts"
16+
headers=MobileFacebookUser, title="Finding authors", unit="posts"
1717
)
1818
def action(cli_args, enricher, loading_bar):
1919
scraper = FacebookMobileScraper(cli_args.cookie, throttle=cli_args.throttle)

minet/cli/facebook/posts.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@
1010
print_translation_warning_if_needed,
1111
)
1212
from minet.facebook import FacebookMobileScraper
13-
from minet.facebook.constants import FACEBOOK_POST_CSV_HEADERS
13+
from minet.facebook.types import MobileFacebookPost
1414
from minet.facebook.exceptions import FacebookInvalidTargetError
1515

1616

1717
@with_facebook_fatal_errors
1818
@with_enricher_and_loading_bar(
19-
headers=FACEBOOK_POST_CSV_HEADERS,
19+
headers=MobileFacebookPost,
2020
title="Scraping group posts",
2121
unit="groups",
2222
nested=True,

minet/facebook/constants.py

-54
Original file line numberDiff line numberDiff line change
@@ -19,56 +19,6 @@
1919
)
2020
FACEBOOK_WEB_RATE_LIMITER_STATE = RateLimiterState(1, FACEBOOK_WEB_DEFAULT_THROTTLE)
2121

22-
FACEBOOK_USER_CSV_HEADERS = ["user_label", "user_id", "user_handle", "user_url"]
23-
24-
FACEBOOK_COMMENT_CSV_HEADERS = [
25-
"post_id",
26-
"id",
27-
"user_id",
28-
"user_handle",
29-
"user_url",
30-
"user_label",
31-
"text",
32-
"html",
33-
"formatted_date",
34-
"date",
35-
"reactions",
36-
"replies",
37-
"in_reply_to",
38-
]
39-
40-
FACEBOOK_POST_CSV_HEADERS = [
41-
"url",
42-
"user_id",
43-
"user_handle",
44-
"user_url",
45-
"user_label",
46-
"text",
47-
"html",
48-
"translated_text",
49-
"translated_html",
50-
"translated_from",
51-
"formatted_date",
52-
"date",
53-
"reactions",
54-
"comments",
55-
]
56-
57-
FACEBOOK_POST_STATS_CSV_HEADERS = [
58-
"error",
59-
"canonical",
60-
"account_name",
61-
"timestamp",
62-
"time",
63-
"link",
64-
"aria_label",
65-
"text",
66-
"share_count",
67-
"comment_count",
68-
"reaction_count",
69-
"video_view_count",
70-
]
71-
7222
FACEBOOK_REACTION_KEYS = OrderedDict(
7323
{
7424
1: "like",
@@ -82,7 +32,3 @@
8232
16: "care",
8333
}
8434
)
85-
86-
FACEBOOK_POST_WITH_REACTIONS_CSV_HEADERS = FACEBOOK_POST_CSV_HEADERS + [
87-
"reactions_types"
88-
]

minet/facebook/formatters.py

-21
This file was deleted.

minet/facebook/mobile_scraper.py

+14-14
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@
3737
from minet.scrape.utils import BeautifulSoupWithoutXHTMLWarnings
3838
from minet.scrape.std import get_display_text
3939
from minet.facebook.utils import grab_facebook_cookie
40-
from minet.facebook.formatters import (
41-
FacebookComment,
42-
FacebookPost,
43-
FacebookUser,
44-
FacebookPostWithReaction,
40+
from minet.facebook.types import (
41+
MobileFacebookComment,
42+
MobileFacebookUser,
43+
MobileFacebookPost,
44+
MobileFacebookPostWithReactions,
4545
)
4646
from minet.facebook.exceptions import (
4747
FacebookInvalidCookieError,
@@ -80,7 +80,7 @@ def resolve_relative_url(url):
8080
return urljoin(FACEBOOK_MOBILE_URL, url)
8181

8282

83-
def scrape_comments(html, direction=None, in_reply_to=None):
83+
def scrape_comments(html, direction=None, in_reply_to=None) -> MobileFacebookComment:
8484
soup = BeautifulSoupWithoutXHTMLWarnings(html, "lxml")
8585

8686
data = {
@@ -139,7 +139,7 @@ def scrape_comments(html, direction=None, in_reply_to=None):
139139
)
140140

141141
for item in valid_items:
142-
item_id = item.get("id")
142+
item_id = item["id"]
143143

144144
# Skipping comment if same as commented
145145
if item_id == in_reply_to:
@@ -206,7 +206,7 @@ def scrape_comments(html, direction=None, in_reply_to=None):
206206
data["replies"].append((resolve_relative_url(replies_url), item_id))
207207

208208
data["comments"].append(
209-
FacebookComment(
209+
MobileFacebookComment(
210210
post_id=post_id,
211211
id=item_id,
212212
user_id=getattr(user, "id", ""),
@@ -302,7 +302,7 @@ def scrape_posts(html):
302302
else None
303303
)
304304

305-
post = FacebookPost(
305+
post = MobileFacebookPost(
306306
url=post_url,
307307
user_id=getattr(user, "id", ""),
308308
user_handle=getattr(user, "handle", ""),
@@ -401,7 +401,7 @@ def scrape_video(soup):
401401
else None
402402
)
403403

404-
post = FacebookPostWithReaction(
404+
post = MobileFacebookPostWithReactions(
405405
url=video_url,
406406
user_id=getattr(user, "id", ""),
407407
user_handle=getattr(user, "handle", ""),
@@ -492,7 +492,7 @@ def scrape_photo(soup):
492492
else None
493493
)
494494

495-
post = FacebookPostWithReaction(
495+
post = MobileFacebookPostWithReactions(
496496
url=photo_url,
497497
user_id=getattr(user, "id", ""),
498498
user_handle=getattr(user, "handle", ""),
@@ -597,7 +597,7 @@ def scrape_post(html):
597597
else None
598598
)
599599

600-
post = FacebookPostWithReaction(
600+
post = MobileFacebookPostWithReactions(
601601
url=post_url,
602602
user_id=getattr(user, "id", ""),
603603
user_handle=getattr(user, "handle", ""),
@@ -765,8 +765,8 @@ def post_author(self, url):
765765
user_label = user_item.get_text().strip()
766766

767767
if isinstance(parsed, ParsedFacebookHandle):
768-
return FacebookUser(user_label, None, parsed.handle, parsed.url)
768+
return MobileFacebookUser(user_label, None, parsed.handle, parsed.url)
769769
elif isinstance(parsed, ParsedFacebookUser):
770-
return FacebookUser(user_label, parsed.id, parsed.handle, parsed.url)
770+
return MobileFacebookUser(user_label, parsed.id, parsed.handle, parsed.url)
771771
else:
772772
raise TypeError

minet/facebook/types.py

+48
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,51 @@ def walk(entries: Iterable[Tuple["FacebookComment", List["FacebookComment"]]]):
111111
walk(filter(lambda entry: entry[0].depth == 0, index.values()))
112112

113113
return sorted_comments
114+
115+
116+
@dataclass
117+
class MobileFacebookComment(TabularRecord):
118+
post_id: str
119+
id: str
120+
user_id: str
121+
user_handle: str
122+
user_url: str
123+
user_label: str
124+
text: str
125+
html: str
126+
formatted_date: str
127+
date: Optional[str]
128+
reactions: str
129+
replies: str
130+
in_reply_to: Optional[str]
131+
132+
133+
@dataclass
134+
class MobileFacebookUser(TabularRecord):
135+
label: str
136+
id: Optional[str]
137+
handle: Optional[str]
138+
url: str
139+
140+
141+
@dataclass
142+
class MobileFacebookPost(TabularRecord):
143+
url: str
144+
user_id: str
145+
user_handle: str
146+
user_url: str
147+
user_label: str
148+
text: str
149+
html: str
150+
translated_text: Optional[str]
151+
translated_html: Optional[str]
152+
translated_from: Optional[str]
153+
formatted_date: str
154+
date: Optional[str]
155+
reactions: str
156+
comments: str
157+
158+
159+
@dataclass
160+
class MobileFacebookPostWithReactions(MobileFacebookPost):
161+
reactions_types: Optional[str]

0 commit comments

Comments
 (0)