Skip to content

Commit

Permalink
Finishing porting ct submodule to tabular records
Browse files Browse the repository at this point in the history
Fix #592
Fix #281
  • Loading branch information
Yomguithereal committed Dec 14, 2023
1 parent 3963ffb commit ddcc8f8
Show file tree
Hide file tree
Showing 15 changed files with 157 additions and 7,812 deletions.
7,549 changes: 0 additions & 7,549 deletions docs/cli.md

Large diffs are not rendered by default.

9 changes: 1 addition & 8 deletions minet/cli/crowdtangle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#
from casanova import RowCountResumer, LastCellResumer

from minet.cli.argparse import BooleanAction, SplitterType, CSVFileType
from minet.cli.argparse import SplitterType, CSVFileType

# TODO: lazyloading issue
from minet.crowdtangle.constants import (
Expand Down Expand Up @@ -42,13 +42,6 @@
$ minet ct leaderboard --token YOUR_TOKEN > accounts-stats.csv
""",
arguments=[
{
"flags": ["--breakdown", "--no-breakdown"],
"help": "Whether to skip statistics breakdown by post type in the CSV output.",
"dest": "breakdown",
"action": BooleanAction,
"default": True,
},
FORMAT_ARGUMENT,
{
"flags": ["-l", "--limit"],
Expand Down
16 changes: 4 additions & 12 deletions minet/cli/crowdtangle/leaderboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,12 @@
#
# Logic of the `ct leaderboard` action.
#
from minet.crowdtangle.constants import (
CROWDTANGLE_LEADERBOARD_CSV_HEADERS,
CROWDTANGLE_LEADERBOARD_CSV_HEADERS_WITH_BREAKDOWN,
)
from minet.crowdtangle.types import CrowdTangleLeaderboard
from minet.cli.crowdtangle.utils import make_paginated_action


def select_csv_headers(cli_args):
if cli_args.breakdown:
return CROWDTANGLE_LEADERBOARD_CSV_HEADERS_WITH_BREAKDOWN

return CROWDTANGLE_LEADERBOARD_CSV_HEADERS


action = make_paginated_action(
method_name="leaderboard", item_name="accounts", csv_headers=select_csv_headers
method_name="leaderboard",
item_name="accounts",
csv_headers=CrowdTangleLeaderboard.fieldnames(),
)
4 changes: 2 additions & 2 deletions minet/cli/crowdtangle/posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
#
# Logic of the `ct posts` action.
#
from minet.crowdtangle.constants import CROWDTANGLE_POST_CSV_HEADERS
from minet.crowdtangle.types import CrowdTanglePost
from minet.cli.crowdtangle.utils import make_paginated_action

action = make_paginated_action(
method_name="posts", item_name="posts", csv_headers=CROWDTANGLE_POST_CSV_HEADERS
method_name="posts", item_name="posts", csv_headers=CrowdTanglePost.fieldnames()
)
6 changes: 3 additions & 3 deletions minet/cli/crowdtangle/posts_by_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
import minet.facebook as facebook
from minet.cli.utils import with_enricher_and_loading_bar
from minet.cli.crowdtangle.utils import with_crowdtangle_utilities
from minet.crowdtangle.constants import CROWDTANGLE_POST_CSV_HEADERS
from minet.crowdtangle.types import CrowdTanglePost
from minet.crowdtangle.exceptions import (
CrowdTanglePostNotFound,
)


@with_crowdtangle_utilities
@with_enricher_and_loading_bar(
headers=CROWDTANGLE_POST_CSV_HEADERS, title="Retrieving posts", unit="posts"
headers=CrowdTanglePost, title="Retrieving posts", unit="posts"
)
def action(cli_args, client, enricher, loading_bar):
for row, url in enricher.cells(cli_args.column, with_rows=True):
Expand All @@ -45,7 +45,7 @@ def action(cli_args, client, enricher, loading_bar):
post = client.post(post_id)

if post is not None:
enricher.writerow(row, post.as_csv_row())
enricher.writerow(row, post)
else:
enricher.writerow(row)
except CrowdTanglePostNotFound as error:
Expand Down
4 changes: 2 additions & 2 deletions minet/cli/crowdtangle/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
#
# Logic of the `ct search` action.
#
from minet.crowdtangle.constants import CROWDTANGLE_POST_CSV_HEADERS
from minet.crowdtangle.types import CrowdTanglePost
from minet.cli.crowdtangle.utils import make_paginated_action

action = make_paginated_action(
method_name="search",
item_name="posts",
csv_headers=CROWDTANGLE_POST_CSV_HEADERS,
csv_headers=CrowdTanglePost.fieldnames(),
get_args=lambda cli_args: [cli_args.terms],
announce=lambda cli_args: 'Searching for: "%s"' % cli_args.terms,
)
9 changes: 3 additions & 6 deletions minet/cli/crowdtangle/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,21 @@

from minet.cli.utils import with_enricher_and_loading_bar
from minet.cli.crowdtangle.utils import with_crowdtangle_utilities
from minet.crowdtangle.constants import (
CROWDTANGLE_SUMMARY_CSV_HEADERS,
CROWDTANGLE_POST_CSV_HEADERS,
)
from minet.crowdtangle.types import CrowdTanglePost, CrowdTangleSummary

# TODO: could be a nested loading bar


@with_crowdtangle_utilities
@with_enricher_and_loading_bar(
headers=CROWDTANGLE_SUMMARY_CSV_HEADERS, title="Collecting data", unit="urls"
headers=CrowdTangleSummary, title="Collecting data", unit="urls"
)
def action(cli_args, client, enricher, loading_bar):
posts_writer = None

if cli_args.posts is not None:
posts_writer = casanova.writer(
cli_args.posts, fieldnames=["url"] + CROWDTANGLE_POST_CSV_HEADERS
cli_args.posts, fieldnames=["url"] + CrowdTanglePost.fieldnames()
)

for row, url in enricher.cells(cli_args.column, with_rows=True):
Expand Down
73 changes: 1 addition & 72 deletions minet/crowdtangle/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

CROWDTANGLE_DEFAULT_RATE_LIMIT = 6 # Number of hits per minute
CROWDTANGLE_LINKS_DEFAULT_RATE_LIMIT = 2
CROWDTANGLE_DEFAULT_START_DATE = "2010"

CROWDTANGLE_DEFAULT_TIMEOUT = Timeout(connect=10, read=60 * 5)

Expand Down Expand Up @@ -94,75 +95,3 @@
"thankful",
"wow",
]

CROWDTANGLE_POST_CSV_HEADERS = [
"ct_id",
"id",
"platform",
"type",
"title",
"caption",
"message",
"description",
"date",
"datetime",
"updated",
"link",
"post_url",
"score",
"video_length_ms",
"live_video_status",
]

for name in CROWDTANGLE_STATISTICS:
CROWDTANGLE_POST_CSV_HEADERS.append("actual_%s_count" % name)
CROWDTANGLE_POST_CSV_HEADERS.append("expected_%s_count" % name)

CROWDTANGLE_ACCOUNT_CSV_HEADERS = [
"account_ct_id",
"account_id",
"account_platform",
"account_name",
"account_handle",
"account_profile_image",
"account_subscriber_count",
"account_url",
"account_verified",
"account_type",
"account_page_admin_top_country",
]

CROWDTANGLE_MEDIA_CSV_HEADERS = ["links", "expanded_links", "media"]

CROWDTANGLE_POST_CSV_HEADERS += CROWDTANGLE_ACCOUNT_CSV_HEADERS
CROWDTANGLE_POST_CSV_HEADERS += CROWDTANGLE_MEDIA_CSV_HEADERS

CROWDTANGLE_SUMMARY_CSV_HEADERS = ["%s_count" % t for t in CROWDTANGLE_REACTION_TYPES]

CROWDTANGLE_LEADERBOARD_CSV_HEADERS = [
"ct_id",
"name",
"handle",
"profile_image",
"subscriber_count",
"url",
"verified",
"initial_subscriber_count",
"final_subscriber_count",
"subscriber_data_notes",
]

for _, substitute_key in CROWDTANGLE_FULL_STATISTICS:
CROWDTANGLE_LEADERBOARD_CSV_HEADERS.append(substitute_key)

CROWDTANGLE_LEADERBOARD_CSV_HEADERS_WITH_BREAKDOWN = list(
CROWDTANGLE_LEADERBOARD_CSV_HEADERS
)

for post_type in CROWDTANGLE_POST_TYPES:
for _, substitute_key in CROWDTANGLE_FULL_STATISTICS:
CROWDTANGLE_LEADERBOARD_CSV_HEADERS_WITH_BREAKDOWN.append(
"%s_%s" % (post_type, substitute_key)
)

CROWDTANGLE_DEFAULT_START_DATE = "2010"
140 changes: 0 additions & 140 deletions minet/crowdtangle/formatters.py

This file was deleted.

4 changes: 2 additions & 2 deletions minet/crowdtangle/leaderboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Function related to leaderboards
#
from minet.crowdtangle.utils import make_paginated_iterator
from minet.crowdtangle.formatters import format_leaderboard
from minet.crowdtangle.types import CrowdTangleLeaderboard

URL_TEMPLATE = "https://api.crowdtangle.com/leaderboard?count=100&token=%s"

Expand All @@ -26,5 +26,5 @@ def url_forge(token=None, list_id=None, start_date=None, **kwargs):
url_forge,
item_key="accountStatistics",
item_id_getter=lambda x: x["account"]["id"],
formatter=format_leaderboard,
formatter=CrowdTangleLeaderboard.from_payload,
)
4 changes: 2 additions & 2 deletions minet/crowdtangle/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ebbe import getpath

from minet.crowdtangle.exceptions import CrowdTangleMissingTokenError
from minet.crowdtangle.formatters import format_post
from minet.crowdtangle.types import CrowdTanglePost

URL_TEMPLATE = "https://api.crowdtangle.com/post/%s?token=%s"

Expand All @@ -31,6 +31,6 @@ def crowdtangle_post(request, post_id, token=None, raw=False):
return

if not raw:
return format_post(post)
return CrowdTanglePost.from_payload(post)

return post
Loading

0 comments on commit ddcc8f8

Please sign in to comment.