@@ -34,6 +34,16 @@ def turn_into_df(data: dict) -> pd.DataFrame:
34
34
35
35
36
36
def fetch_twitch_data (twitch : Twitch , fn_name : str , ** kwargs ) -> pd .DataFrame :
37
+ """ fetch data from Twitch API
38
+
39
+ Args:
40
+ twitch (Twitch): twitchAPI object
41
+ fn_name (str): function name of twitchAPI
42
+ **kwargs: arguments for fn_name
43
+
44
+ Returns:
45
+ pd.DataFrame: fetched data
46
+ """
37
47
n = kwargs ['first' ]
38
48
fn = getattr (twitch , fn_name )
39
49
@@ -44,8 +54,10 @@ def fetch_twitch_data(twitch: Twitch, fn_name: str, **kwargs) -> pd.DataFrame:
44
54
while (n > 0 ):
45
55
kwargs ['first' ] = min (100 , n )
46
56
n -= kwargs ['first' ]
57
+ # check if there is more pages
58
+ if not data_all ['pagination' ]:
59
+ break
47
60
kwargs ['after' ] = data_all ['pagination' ]['cursor' ]
48
-
49
61
data_all = fn (** kwargs )
50
62
data = pd .concat ([data , turn_into_df (data_all )])
51
63
@@ -67,31 +79,47 @@ def fetch_top_games(twitch: Twitch, n: int = 100) -> pd.DataFrame:
67
79
return top_games
68
80
69
81
70
- def fetch_game_streams (twitch : Twitch , game_id : str ) -> pd .DataFrame :
82
+ def fetch_game_streams (twitch : Twitch ,
83
+ game_id : str ,
84
+ n : int = 100 ) -> pd .DataFrame :
71
85
""" fetch game streams data from Twitch API
72
86
73
87
Args:
74
88
twitch (Twitch): twitch api instance
75
89
game_ids (str): list of game ids
90
+ n (int): how many streams to fetch
76
91
77
92
Returns:
78
93
pd.DataFrame / None: dataframe of game streams
79
94
"""
80
- game_streams = twitch .get_streams (first = 100 , game_id = [game_id ])
81
- game_streams = turn_into_df (game_streams )
95
+ kwargs = {'first' : n , 'game_id' : [game_id ]}
96
+ game_streams = fetch_twitch_data (twitch , 'get_streams' , ** kwargs )
97
+ # game_streams = twitch.get_streams(first=100, game_id=[game_id])
98
+ # game_streams = turn_into_df(game_streams)
82
99
# get user id to dig more data
83
100
try :
84
- user_ids = game_streams ['user_id' ].tolist ()
101
+ total_user_ids = game_streams ['user_id' ].tolist ()
102
+ user_ids_num = len (total_user_ids )
103
+ ephoch = user_ids_num // 100
104
+ if user_ids_num % 100 != 0 :
105
+ ephoch += 1
85
106
except :
86
107
print ('game_streams' )
87
108
cprint ('Error: ' + game_id + ' data broken. Jump over it.' , 'red' )
88
109
return None
89
110
else :
90
- users_data = twitch .get_users (user_ids = user_ids )
91
- users_data = turn_into_df (users_data )
92
- # select needed columns
93
- users_data = users_data [['broadcaster_type' , 'description' , 'type' ]]
94
- game_streams = pd .concat ([game_streams , users_data ], axis = 1 )
111
+ total_users_data = pd .DataFrame (columns = ['broadcaster_type' , 'description' , 'type' ])
112
+ for i in range (ephoch ):
113
+ user_ids = total_user_ids [i * 100 : i * 100 + 100 ]
114
+ users_data = twitch .get_users (user_ids = user_ids )
115
+ users_data = turn_into_df (users_data )
116
+ # select needed columns
117
+ users_data = users_data [['broadcaster_type' , 'description' , 'type' ]]
118
+ total_users_data = total_users_data .append (users_data , ignore_index = True )
119
+
120
+ total_users_data .reset_index (drop = True , inplace = True )
121
+ game_streams .reset_index (drop = True , inplace = True )
122
+ game_streams = pd .concat ([game_streams , total_users_data ], axis = 1 )
95
123
return game_streams
96
124
97
125
@@ -151,4 +179,3 @@ def fetch_game_info(df: pd.DataFrame) -> pd.DataFrame:
151
179
152
180
df = df .assign (** data_dict )
153
181
return df
154
-
0 commit comments