Skip to content

Commit ee6f652

Browse files
committed
Refactoring Instagram submodule to use tabular records
1 parent 5688dd0 commit ee6f652

11 files changed

+427
-510
lines changed

minet/cli/instagram/comments.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
from minet.cli.utils import with_enricher_and_loading_bar
1010
from minet.cli.instagram.utils import with_instagram_fatal_errors
1111
from minet.instagram import InstagramAPIScraper
12-
from minet.instagram.constants import INSTAGRAM_COMMENT_CSV_HEADERS
12+
from minet.instagram.types import InstagramComment
1313
from minet.instagram.exceptions import InstagramInvalidTargetError
1414

1515

1616
@with_instagram_fatal_errors
1717
@with_enricher_and_loading_bar(
18-
headers=INSTAGRAM_COMMENT_CSV_HEADERS,
18+
headers=InstagramComment,
1919
title="Scraping post comments",
2020
unit="posts",
2121
nested=True,

minet/cli/instagram/hashtag.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
from minet.cli.utils import with_enricher_and_loading_bar
1010
from minet.cli.instagram.utils import with_instagram_fatal_errors
1111
from minet.instagram import InstagramAPIScraper
12-
from minet.instagram.constants import INSTAGRAM_HASHTAG_POST_CSV_HEADERS
12+
from minet.instagram.types import InstagramHashtagPost
1313
from minet.instagram.exceptions import InstagramHashtagNeverUsedError
1414

1515

1616
@with_instagram_fatal_errors
1717
@with_enricher_and_loading_bar(
18-
headers=INSTAGRAM_HASHTAG_POST_CSV_HEADERS,
18+
headers=InstagramHashtagPost,
1919
title="Scraping posts",
2020
unit="hashtag",
2121
nested=True,
@@ -35,7 +35,7 @@ def action(cli_args, enricher, loading_bar):
3535
generator = islice(generator, cli_args.limit)
3636

3737
for post in generator:
38-
enricher.writerow(row, post.as_csv_row())
38+
enricher.writerow(row, post)
3939
loading_bar.nested_advance()
4040

4141
except InstagramHashtagNeverUsedError:

minet/cli/instagram/post_infos.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
from minet.cli.utils import with_enricher_and_loading_bar
88
from minet.cli.instagram.utils import with_instagram_fatal_errors
99
from minet.instagram import InstagramAPIScraper
10-
from minet.instagram.constants import INSTAGRAM_POST_CSV_HEADERS
10+
from minet.instagram.types import InstagramPost
1111
from minet.instagram.exceptions import InstagramInvalidTargetError
1212

1313

1414
@with_instagram_fatal_errors
1515
@with_enricher_and_loading_bar(
16-
headers=INSTAGRAM_POST_CSV_HEADERS, title="Scraping infos", unit="posts"
16+
headers=InstagramPost, title="Scraping infos", unit="posts"
1717
)
1818
def action(cli_args, enricher, loading_bar):
1919
client = InstagramAPIScraper(cookie=cli_args.cookie)
@@ -25,7 +25,7 @@ def action(cli_args, enricher, loading_bar):
2525
try:
2626
result = client.post_infos(user)
2727

28-
enricher.writerow(row, result.as_csv_row())
28+
enricher.writerow(row, result)
2929

3030
except InstagramInvalidTargetError:
3131
loading_bar.print(

minet/cli/instagram/user_followers.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from minet.cli.utils import with_enricher_and_loading_bar
1010
from minet.cli.instagram.utils import with_instagram_fatal_errors
1111
from minet.instagram import InstagramAPIScraper
12-
from minet.instagram.constants import INSTAGRAM_USER_CSV_HEADERS
12+
from minet.instagram.types import InstagramUser
1313
from minet.instagram.exceptions import (
1414
InstagramInvalidTargetError,
1515
InstagramAccountNoFollowError,
@@ -19,7 +19,7 @@
1919

2020
@with_instagram_fatal_errors
2121
@with_enricher_and_loading_bar(
22-
headers=INSTAGRAM_USER_CSV_HEADERS,
22+
headers=InstagramUser,
2323
title="Scraping followers",
2424
unit="users",
2525
nested=True,
@@ -38,8 +38,8 @@ def action(cli_args, enricher, loading_bar):
3838
if cli_args.limit:
3939
generator = islice(generator, cli_args.limit)
4040

41-
for post in generator:
42-
enricher.writerow(row, post.as_csv_row())
41+
for user in generator:
42+
enricher.writerow(row, user)
4343
loading_bar.nested_advance()
4444

4545
except InstagramInvalidTargetError:

minet/cli/instagram/user_following.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from minet.cli.utils import with_enricher_and_loading_bar
1010
from minet.cli.instagram.utils import with_instagram_fatal_errors
1111
from minet.instagram import InstagramAPIScraper
12-
from minet.instagram.constants import INSTAGRAM_USER_CSV_HEADERS
12+
from minet.instagram.types import InstagramUser
1313
from minet.instagram.exceptions import (
1414
InstagramInvalidTargetError,
1515
InstagramPrivateAccountError,
@@ -19,7 +19,7 @@
1919

2020
@with_instagram_fatal_errors
2121
@with_enricher_and_loading_bar(
22-
headers=INSTAGRAM_USER_CSV_HEADERS,
22+
headers=InstagramUser,
2323
title="Scraping followees",
2424
unit="users",
2525
nested=True,
@@ -38,8 +38,8 @@ def action(cli_args, enricher, loading_bar):
3838
if cli_args.limit:
3939
generator = islice(generator, cli_args.limit)
4040

41-
for post in generator:
42-
enricher.writerow(row, post.as_csv_row())
41+
for user in generator:
42+
enricher.writerow(row, user)
4343
loading_bar.nested_advance()
4444

4545
except InstagramInvalidTargetError:

minet/cli/instagram/user_infos.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
from minet.cli.utils import with_enricher_and_loading_bar
88
from minet.cli.instagram.utils import with_instagram_fatal_errors
99
from minet.instagram import InstagramAPIScraper
10-
from minet.instagram.constants import INSTAGRAM_USER_INFO_CSV_HEADERS
10+
from minet.instagram.types import InstagramUserInfo
1111
from minet.instagram.exceptions import InstagramInvalidTargetError
1212

1313

1414
@with_instagram_fatal_errors
1515
@with_enricher_and_loading_bar(
16-
headers=INSTAGRAM_USER_INFO_CSV_HEADERS, title="Scraping infos", unit="users"
16+
headers=InstagramUserInfo, title="Scraping infos", unit="users"
1717
)
1818
def action(cli_args, enricher, loading_bar):
1919
client = InstagramAPIScraper(cookie=cli_args.cookie)
@@ -23,9 +23,8 @@ def action(cli_args, enricher, loading_bar):
2323
):
2424
with loading_bar.step():
2525
try:
26-
result = client.user_infos(user)
27-
28-
enricher.writerow(row, result.as_csv_row())
26+
info = client.user_infos(user)
27+
enricher.writerow(row, info)
2928

3029
except InstagramInvalidTargetError:
3130
loading_bar.print(

minet/cli/instagram/user_posts.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from minet.cli.utils import with_enricher_and_loading_bar
1010
from minet.cli.instagram.utils import with_instagram_fatal_errors
1111
from minet.instagram import InstagramAPIScraper
12-
from minet.instagram.constants import INSTAGRAM_POST_CSV_HEADERS
12+
from minet.instagram.types import InstagramPost
1313
from minet.instagram.exceptions import (
1414
InstagramNoPublicationError,
1515
InstagramPrivateOrNonExistentAccountError,
@@ -19,7 +19,7 @@
1919

2020
@with_instagram_fatal_errors
2121
@with_enricher_and_loading_bar(
22-
headers=INSTAGRAM_POST_CSV_HEADERS,
22+
headers=InstagramPost,
2323
title="Scraping posts",
2424
unit="users",
2525
nested=True,
@@ -39,7 +39,7 @@ def action(cli_args, enricher, loading_bar):
3939
generator = islice(generator, cli_args.limit)
4040

4141
for post in generator:
42-
enricher.writerow(row, post.as_csv_row())
42+
enricher.writerow(row, post)
4343
loading_bar.nested_advance()
4444

4545
except InstagramInvalidTargetError:

minet/instagram/api_scraper.py

+25-22
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#
55
# Instagram public API "scraper".
66
#
7+
from typing import Dict, Iterator
8+
79
import json
810
from urllib.parse import quote
911
from ebbe import getpath
@@ -50,12 +52,12 @@
5052
InstagramAccountNoFollowError,
5153
InstagramPrivateAccountError,
5254
)
53-
from minet.instagram.formatters import (
54-
format_comment,
55-
format_hashtag_post,
56-
format_post,
57-
format_user,
58-
format_user_info,
55+
from minet.instagram.types import (
56+
InstagramComment,
57+
InstagramHashtagPost,
58+
InstagramPost,
59+
InstagramUser,
60+
InstagramUserInfo,
5961
)
6062

6163
INSTAGRAM_GRAPHQL_ENDPOINT = "https://www.instagram.com/graphql/query/"
@@ -216,9 +218,10 @@ def __init__(self, cookie="firefox"):
216218

217219
@retrying_method()
218220
def request_json(self, url, magic_token=False):
219-
headers = {"Cookie": self.cookie}
221+
headers: Dict[str, str] = {"Cookie": self.cookie}
220222

221223
if magic_token:
224+
assert self.magic_token is not None
222225
headers["X-IG-App-ID"] = self.magic_token
223226

224227
response = request(
@@ -290,7 +293,7 @@ def get_magic_token(self):
290293
return self.magic_token
291294

292295
@ensure_magic_token
293-
def comments(self, post):
296+
def comments(self, post) -> Iterator[InstagramComment]:
294297
if not INSTAGRAM_ID_PATTERN.match(post):
295298
parsed = parse_instagram_url(post)
296299
if isinstance(parsed, (ParsedInstagramPost, ParsedInstagramReel)):
@@ -341,7 +344,7 @@ def comments(self, post):
341344

342345
already_seen.add(item["pk"])
343346

344-
yield format_comment(item)
347+
yield InstagramComment.from_payload(item)
345348

346349
if item.get("child_comment_count") > 0:
347350
max_id = ""
@@ -359,7 +362,7 @@ def comments(self, post):
359362
children_items = data_comment.get("child_comments")
360363

361364
for children_item in children_items:
362-
yield format_comment(children_item)
365+
yield InstagramComment.from_payload(children_item)
363366

364367
more_available = data_comment.get(
365368
"has_more_tail_child_comments"
@@ -375,7 +378,7 @@ def comments(self, post):
375378
if not min_id:
376379
break
377380

378-
def search_hashtag(self, hashtag):
381+
def search_hashtag(self, hashtag) -> Iterator[InstagramHashtagPost]:
379382
hashtag = hashtag.lstrip("#")
380383
cursor = None
381384

@@ -395,7 +398,7 @@ def search_hashtag(self, hashtag):
395398
edges = data.get("edges")
396399

397400
for edge in edges:
398-
yield format_hashtag_post(edge["node"])
401+
yield InstagramHashtagPost.from_payload(edge["node"])
399402

400403
has_next_page = getpath(data, ["page_info", "has_next_page"])
401404

@@ -405,7 +408,7 @@ def search_hashtag(self, hashtag):
405408
cursor = getpath(data, ["page_info", "end_cursor"])
406409

407410
@ensure_magic_token
408-
def post_infos(self, name):
411+
def post_infos(self, name) -> InstagramPost:
409412
if INSTAGRAM_ID_PATTERN.match(name):
410413
url = forge_post_url_from_id(name)
411414

@@ -427,7 +430,7 @@ def post_infos(self, name):
427430
if not data:
428431
raise InstagramInvalidTargetError
429432

430-
return format_post(getpath(data, ["items", 0]))
433+
return InstagramPost.from_payload(getpath(data, ["items", 0]))
431434

432435
def get_username(self, name):
433436
if INSTAGRAM_ID_PATTERN.match(name):
@@ -464,7 +467,7 @@ def get_user(self, name):
464467
return self.request_json(url, magic_token=True)
465468

466469
@ensure_magic_token
467-
def user_followers(self, name):
470+
def user_followers(self, name) -> Iterator[InstagramUser]:
468471
name = self.get_username(name)
469472

470473
max_id = None
@@ -491,15 +494,15 @@ def user_followers(self, name):
491494
raise InstagramAccountNoFollowError
492495

493496
for item in items:
494-
yield format_user(item)
497+
yield InstagramUser.from_payload(item)
495498

496499
max_id = data.get("next_max_id")
497500

498501
if not max_id:
499502
break
500503

501504
@ensure_magic_token
502-
def user_following(self, name):
505+
def user_following(self, name) -> Iterator[InstagramUser]:
503506
name = self.get_username(name)
504507

505508
max_id = None
@@ -526,15 +529,15 @@ def user_following(self, name):
526529
raise InstagramAccountNoFollowError
527530

528531
for item in items:
529-
yield format_user(item)
532+
yield InstagramUser.from_payload(item)
530533

531534
max_id = data.get("next_max_id")
532535

533536
if not max_id:
534537
break
535538

536539
@ensure_magic_token
537-
def user_posts(self, name):
540+
def user_posts(self, name) -> Iterator[InstagramPost]:
538541
name = self.get_username(name)
539542

540543
max_id = None
@@ -562,7 +565,7 @@ def user_posts(self, name):
562565
raise InstagramPrivateOrNonExistentAccountError
563566

564567
for item in items:
565-
yield format_post(item)
568+
yield InstagramPost.from_payload(item)
566569

567570
more_available = data.get("more_available")
568571

@@ -572,7 +575,7 @@ def user_posts(self, name):
572575
max_id = data.get("next_max_id")
573576

574577
@ensure_magic_token
575-
def user_infos(self, name):
578+
def user_infos(self, name) -> InstagramUserInfo:
576579
name = self.get_username(name)
577580

578581
data = self.get_user(name)
@@ -585,4 +588,4 @@ def user_infos(self, name):
585588
if not user:
586589
raise InstagramInvalidTargetError
587590

588-
return format_user_info(user)
591+
return InstagramUserInfo.from_payload(user)

0 commit comments

Comments
 (0)