From ff639f839a31555124d76efa19d0117dee2077ab Mon Sep 17 00:00:00 2001 From: Alexander Ott <66271487+AlexanderHott@users.noreply.github.com> Date: Fri, 20 Dec 2024 22:45:22 -0800 Subject: [PATCH 1/2] speed up nba task --- .gitignore | 2 + daily_colab_task/daily_task.py | 650 ++++++++++++++++----------------- 2 files changed, 320 insertions(+), 332 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..519542e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.venv/ +.ruff_cache/ diff --git a/daily_colab_task/daily_task.py b/daily_colab_task/daily_task.py index 888cf98..1dfbb29 100644 --- a/daily_colab_task/daily_task.py +++ b/daily_colab_task/daily_task.py @@ -34,7 +34,12 @@ # Helper function for DB connection def get_db_connection(): logging.info("connecting to db") - conn = psycopg2.connect(**conn_params) + conn = psycopg2.connect( + dbname=os.getenv("DB_NAME"), + user=os.getenv("DB_USER"), + password=os.getenv("DB_PASSWORD"), + host=os.getenv("DB_HOST"), + ) logging.info("connected to db") return conn @@ -76,123 +81,117 @@ def safe_int(value): return 0 -def update_player_game_stats(stats_dict, player_id): - if stats_dict: - with get_db_connection() as conn: - with conn.cursor() as cur: - for game_id, stats in stats_dict.items(): - if not isinstance(stats, dict): - logging.info( - f"Stats for game {game_id} is not a dictionary. Skipping." - ) - continue - - team_abv = stats.get("teamAbv", "") - team_id = stats.get("teamID", None) - - if game_id: - try: - date_str, game = game_id.split("_") - away_team, home_team = game.split("@") - game_date = datetime.strptime(date_str, "%Y%m%d").date() - - if team_abv == away_team: - opponent = home_team - home_away = "Away" - elif team_abv == home_team: - opponent = away_team - home_away = "Home" - else: - opponent = "" - home_away = "" - except ValueError as e: - logging.warning( - f"Failed to parse gameID '{game_id}' for player ID {player_id}: {e}" - ) - game_date = None - opponent = "" - home_away = "" - else: - game_date = None - opponent = "" - home_away = "" - - insert_query = """ - INSERT INTO nba_player_game_stats - (player_id, game_id, team_id, minutes_played, points, rebounds, assists, steals, blocks, turnovers, - offensive_rebounds, defensive_rebounds, free_throw_percentage, plus_minus, technical_fouls, - field_goal_attempts, three_point_fg_percentage, field_goals_made, field_goal_percentage, - three_point_fg_made, free_throw_attempts, three_point_fg_attempts, personal_fouls, - free_throws_made, fantasy_points, home_away, opponent, game_date, team_abv) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) - ON CONFLICT (player_id, game_id) DO UPDATE SET - team_id = EXCLUDED.team_id, - minutes_played = EXCLUDED.minutes_played, - points = EXCLUDED.points, - rebounds = EXCLUDED.rebounds, - assists = EXCLUDED.assists, - steals = EXCLUDED.steals, - blocks = EXCLUDED.blocks, - turnovers = EXCLUDED.turnovers, - offensive_rebounds = EXCLUDED.offensive_rebounds, - defensive_rebounds = EXCLUDED.defensive_rebounds, - free_throw_percentage = EXCLUDED.free_throw_percentage, - plus_minus = EXCLUDED.plus_minus, - technical_fouls = EXCLUDED.technical_fouls, - field_goal_attempts = EXCLUDED.field_goal_attempts, - three_point_fg_percentage = EXCLUDED.three_point_fg_percentage, - field_goals_made = EXCLUDED.field_goals_made, - field_goal_percentage = EXCLUDED.field_goal_percentage, - three_point_fg_made = EXCLUDED.three_point_fg_made, - free_throw_attempts = EXCLUDED.free_throw_attempts, - three_point_fg_attempts = EXCLUDED.three_point_fg_attempts, - personal_fouls = EXCLUDED.personal_fouls, - free_throws_made = EXCLUDED.free_throws_made, - fantasy_points = EXCLUDED.fantasy_points, - home_away = EXCLUDED.home_away, - opponent = EXCLUDED.opponent, - game_date = EXCLUDED.game_date, - team_abv = EXCLUDED.team_abv - """ - - values = ( - player_id, - game_id, - team_id, - safe_float(stats.get("mins", 0)), - safe_int(stats.get("pts", 0)), - safe_int(stats.get("reb", 0)), - safe_int(stats.get("ast", 0)), - safe_int(stats.get("stl", 0)), - safe_int(stats.get("blk", 0)), - safe_int(stats.get("TOV", 0)), - safe_int(stats.get("OffReb", 0)), - safe_int(stats.get("DefReb", 0)), - safe_float(stats.get("ftp", 0.0)), - safe_float(stats.get("plusMinus", 0.0)), - safe_int(stats.get("tech", 0)), - safe_int(stats.get("fga", 0)), - safe_float(stats.get("tptfgp", 0.0)), - safe_int(stats.get("fgm", 0)), - safe_float(stats.get("fgp", 0.0)), - safe_int(stats.get("tptfgm", 0)), - safe_int(stats.get("fta", 0)), - safe_int(stats.get("tptfga", 0)), - safe_int(stats.get("PF", 0)), - safe_int(stats.get("ftm", 0)), - safe_float(stats.get("fantasyPoints", 0.0)), - home_away, - opponent, - game_date, - team_abv, - ) - - cur.execute(insert_query, values) - conn.commit() - else: - logging.info( +def update_player_game_stats(stats_dict, player_id, cur): + if not stats_dict: + logging.warning( f"No stats available for player ID {player_id}. Skipping stats update." ) + return + for game_id, stats in stats_dict.items(): + if not isinstance(stats, dict): + logging.info(f"Stats for game {game_id} is not a dictionary. Skipping.") + continue + + team_abv = stats.get("teamAbv", "") + team_id = stats.get("teamID", None) + + if game_id: + try: + date_str, game = game_id.split("_") + away_team, home_team = game.split("@") + game_date = datetime.strptime(date_str, "%Y%m%d").date() + + if team_abv == away_team: + opponent = home_team + home_away = "Away" + elif team_abv == home_team: + opponent = away_team + home_away = "Home" + else: + opponent = "" + home_away = "" + except ValueError as e: + logging.warning( + f"Failed to parse gameID '{game_id}' for player ID {player_id}: {e}" + ) + game_date = None + opponent = "" + home_away = "" + else: + game_date = None + opponent = "" + home_away = "" + + insert_query = """ + INSERT INTO nba_player_game_stats + (player_id, game_id, team_id, minutes_played, points, rebounds, assists, steals, blocks, turnovers, + offensive_rebounds, defensive_rebounds, free_throw_percentage, plus_minus, technical_fouls, + field_goal_attempts, three_point_fg_percentage, field_goals_made, field_goal_percentage, + three_point_fg_made, free_throw_attempts, three_point_fg_attempts, personal_fouls, + free_throws_made, fantasy_points, home_away, opponent, game_date, team_abv) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + ON CONFLICT (player_id, game_id) DO UPDATE SET + team_id = EXCLUDED.team_id, + minutes_played = EXCLUDED.minutes_played, + points = EXCLUDED.points, + rebounds = EXCLUDED.rebounds, + assists = EXCLUDED.assists, + steals = EXCLUDED.steals, + blocks = EXCLUDED.blocks, + turnovers = EXCLUDED.turnovers, + offensive_rebounds = EXCLUDED.offensive_rebounds, + defensive_rebounds = EXCLUDED.defensive_rebounds, + free_throw_percentage = EXCLUDED.free_throw_percentage, + plus_minus = EXCLUDED.plus_minus, + technical_fouls = EXCLUDED.technical_fouls, + field_goal_attempts = EXCLUDED.field_goal_attempts, + three_point_fg_percentage = EXCLUDED.three_point_fg_percentage, + field_goals_made = EXCLUDED.field_goals_made, + field_goal_percentage = EXCLUDED.field_goal_percentage, + three_point_fg_made = EXCLUDED.three_point_fg_made, + free_throw_attempts = EXCLUDED.free_throw_attempts, + three_point_fg_attempts = EXCLUDED.three_point_fg_attempts, + personal_fouls = EXCLUDED.personal_fouls, + free_throws_made = EXCLUDED.free_throws_made, + fantasy_points = EXCLUDED.fantasy_points, + home_away = EXCLUDED.home_away, + opponent = EXCLUDED.opponent, + game_date = EXCLUDED.game_date, + team_abv = EXCLUDED.team_abv + """ + + values = ( + player_id, + game_id, + team_id, + safe_float(stats.get("mins", 0)), + safe_int(stats.get("pts", 0)), + safe_int(stats.get("reb", 0)), + safe_int(stats.get("ast", 0)), + safe_int(stats.get("stl", 0)), + safe_int(stats.get("blk", 0)), + safe_int(stats.get("TOV", 0)), + safe_int(stats.get("OffReb", 0)), + safe_int(stats.get("DefReb", 0)), + safe_float(stats.get("ftp", 0.0)), + safe_float(stats.get("plusMinus", 0.0)), + safe_int(stats.get("tech", 0)), + safe_int(stats.get("fga", 0)), + safe_float(stats.get("tptfgp", 0.0)), + safe_int(stats.get("fgm", 0)), + safe_float(stats.get("fgp", 0.0)), + safe_int(stats.get("tptfgm", 0)), + safe_int(stats.get("fta", 0)), + safe_int(stats.get("tptfga", 0)), + safe_int(stats.get("PF", 0)), + safe_int(stats.get("ftm", 0)), + safe_float(stats.get("fantasyPoints", 0.0)), + home_away, + opponent, + game_date, + team_abv, + ) + cur.execute(insert_query, values) # Block 2: Fetch and update player injuries @@ -213,44 +212,40 @@ def is_injury_current(injury): return True -def update_player_injuries(injury_list): - if injury_list: - with get_db_connection() as conn: - with conn.cursor() as cur: - player_injuries = {} - for injury in injury_list: - player_id = injury["playerID"] - inj_date = injury["injDate"] - - if is_injury_current(injury): - if ( - player_id not in player_injuries - or inj_date > player_injuries[player_id]["injDate"] - ): - player_injuries[player_id] = injury - - for player_id, injury in player_injuries.items(): - cur.execute( - """ - UPDATE nba_players - SET injury = %s::jsonb - WHERE player_id = %s - """, - (json.dumps([injury]), player_id), - ) - - cur.execute( - """ - UPDATE nba_players - SET injury = NULL - WHERE player_id NOT IN %s - """, - (tuple(player_injuries.keys()) or (None,),), - ) +def update_player_injuries(injury_list, cur): + if not injury_list: + logging.warning(f"no injury data available for {injury_list}") + return + player_injuries = {} + for injury in injury_list: + player_id = injury["playerID"] + inj_date = injury["injDate"] + + if is_injury_current(injury): + if ( + player_id not in player_injuries + or inj_date > player_injuries[player_id]["injDate"] + ): + player_injuries[player_id] = injury + + for player_id, injury in player_injuries.items(): + cur.execute( + """ + UPDATE nba_players + SET injury = %s::jsonb + WHERE player_id = %s + """, + (json.dumps([injury]), player_id), + ) - conn.commit() - else: - print("No injury data available") + cur.execute( + """ + UPDATE nba_players + SET injury = NULL + WHERE player_id NOT IN %s + """, + (tuple(player_injuries.keys()) or (None,),), + ) # Block 3: Fetch and update player information and season stats @@ -283,104 +278,99 @@ def fetch_player_info(first_name): return None -def update_player_info(player_data): - if "nbaComHeadshot" in player_data and player_data["nbaComHeadshot"]: - with get_db_connection() as conn: - with conn.cursor() as cur: - cur.execute( - """ - UPDATE nba_players - SET player_pic = %s - WHERE name = %s - """, - (player_data["nbaComHeadshot"], player_data["longName"]), - ) - conn.commit() +def update_player_info(player_data, cur): + if not player_data.get("nbaComHeadshot", ""): + logging.warning(f"No player info for {player_data}") + return + cur.execute( + """ + UPDATE nba_players + SET player_pic = %s + WHERE name = %s + """, + (player_data["nbaComHeadshot"], player_data["longName"]), + ) -def update_player_season_stats(player_data): +def update_player_season_stats(player_data, cur): stats = player_data.get("stats") - if stats: - with get_db_connection() as conn: - with conn.cursor() as cur: - cur.execute("SELECT id FROM nba_seasons WHERE season_year = '2025'") - season_id_result = cur.fetchone() - season_id = season_id_result[0] if season_id_result else 2 - - update_query = sql.SQL(""" - INSERT INTO nba_player_season_stats - (player_id, season_id, games_played, points_per_game, rebounds_per_game, - assists_per_game, steals_per_game, blocks_per_game, turnovers_per_game, - field_goal_percentage, three_point_percentage, free_throw_percentage, - minutes_per_game, offensive_rebounds_per_game, defensive_rebounds_per_game, - field_goals_made_per_game, field_goals_attempted_per_game, - three_pointers_made_per_game, three_pointers_attempted_per_game, - free_throws_made_per_game, free_throws_attempted_per_game) - VALUES ( - (SELECT id FROM nba_players WHERE name = %s), - %s, - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s - ) - ON CONFLICT (player_id, season_id) DO UPDATE - SET games_played = EXCLUDED.games_played, - points_per_game = EXCLUDED.points_per_game, - rebounds_per_game = EXCLUDED.rebounds_per_game, - assists_per_game = EXCLUDED.assists_per_game, - steals_per_game = EXCLUDED.steals_per_game, - blocks_per_game = EXCLUDED.blocks_per_game, - turnovers_per_game = EXCLUDED.turnovers_per_game, - field_goal_percentage = EXCLUDED.field_goal_percentage, - three_point_percentage = EXCLUDED.three_point_percentage, - free_throw_percentage = EXCLUDED.free_throw_percentage, - minutes_per_game = EXCLUDED.minutes_per_game, - offensive_rebounds_per_game = EXCLUDED.offensive_rebounds_per_game, - defensive_rebounds_per_game = EXCLUDED.defensive_rebounds_per_game, - field_goals_made_per_game = EXCLUDED.field_goals_made_per_game, - field_goals_attempted_per_game = EXCLUDED.field_goals_attempted_per_game, - three_pointers_made_per_game = EXCLUDED.three_pointers_made_per_game, - three_pointers_attempted_per_game = EXCLUDED.three_pointers_attempted_per_game, - free_throws_made_per_game = EXCLUDED.free_throws_made_per_game, - free_throws_attempted_per_game = EXCLUDED.free_throws_attempted_per_game - """) - cur.execute( - update_query, - ( - player_data["longName"], - season_id, - stats.get("gamesPlayed", 0), - stats.get("pts", 0.0), - stats.get("reb", 0.0), - stats.get("ast", 0.0), - stats.get("stl", 0.0), - stats.get("blk", 0.0), - stats.get("TOV", 0.0), - stats.get("fgp", 0.0), - stats.get("tptfgp", 0.0), - stats.get("ftp", 0.0), - stats.get("mins", 0.0), - stats.get("OffReb", 0.0), - stats.get("DefReb", 0.0), - stats.get("fgm", 0.0), - stats.get("fga", 0.0), - stats.get("tptfgm", 0.0), - stats.get("tptfga", 0.0), - stats.get("ftm", 0.0), - stats.get("fta", 0.0), - ), - ) - conn.commit() - else: - print( + if not stats: + logging.warning( f"No stats available for {player_data['longName']}. Skipping stats update." ) + return + + cur.execute("SELECT id FROM nba_seasons WHERE season_year = '2025'") + season_id_result = cur.fetchone() + season_id = season_id_result[0] if season_id_result else 2 + + update_query = sql.SQL(""" + INSERT INTO nba_player_season_stats + (player_id, season_id, games_played, points_per_game, rebounds_per_game, + assists_per_game, steals_per_game, blocks_per_game, turnovers_per_game, + field_goal_percentage, three_point_percentage, free_throw_percentage, + minutes_per_game, offensive_rebounds_per_game, defensive_rebounds_per_game, + field_goals_made_per_game, field_goals_attempted_per_game, + three_pointers_made_per_game, three_pointers_attempted_per_game, + free_throws_made_per_game, free_throws_attempted_per_game) + VALUES ( + (SELECT id FROM nba_players WHERE name = %s), + %s, + %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s + ) + ON CONFLICT (player_id, season_id) DO UPDATE + SET games_played = EXCLUDED.games_played, + points_per_game = EXCLUDED.points_per_game, + rebounds_per_game = EXCLUDED.rebounds_per_game, + assists_per_game = EXCLUDED.assists_per_game, + steals_per_game = EXCLUDED.steals_per_game, + blocks_per_game = EXCLUDED.blocks_per_game, + turnovers_per_game = EXCLUDED.turnovers_per_game, + field_goal_percentage = EXCLUDED.field_goal_percentage, + three_point_percentage = EXCLUDED.three_point_percentage, + free_throw_percentage = EXCLUDED.free_throw_percentage, + minutes_per_game = EXCLUDED.minutes_per_game, + offensive_rebounds_per_game = EXCLUDED.offensive_rebounds_per_game, + defensive_rebounds_per_game = EXCLUDED.defensive_rebounds_per_game, + field_goals_made_per_game = EXCLUDED.field_goals_made_per_game, + field_goals_attempted_per_game = EXCLUDED.field_goals_attempted_per_game, + three_pointers_made_per_game = EXCLUDED.three_pointers_made_per_game, + three_pointers_attempted_per_game = EXCLUDED.three_pointers_attempted_per_game, + free_throws_made_per_game = EXCLUDED.free_throws_made_per_game, + free_throws_attempted_per_game = EXCLUDED.free_throws_attempted_per_game + """) + cur.execute( + update_query, + ( + player_data["longName"], + season_id, + stats.get("gamesPlayed", 0), + stats.get("pts", 0.0), + stats.get("reb", 0.0), + stats.get("ast", 0.0), + stats.get("stl", 0.0), + stats.get("blk", 0.0), + stats.get("TOV", 0.0), + stats.get("fgp", 0.0), + stats.get("tptfgp", 0.0), + stats.get("ftp", 0.0), + stats.get("mins", 0.0), + stats.get("OffReb", 0.0), + stats.get("DefReb", 0.0), + stats.get("fgm", 0.0), + stats.get("fga", 0.0), + stats.get("tptfgm", 0.0), + stats.get("tptfga", 0.0), + stats.get("ftm", 0.0), + stats.get("fta", 0.0), + ), + ) # Block 4: Fetch and update team stats -def fetch_team_names(): - with get_db_connection() as conn: - with conn.cursor() as cur: - cur.execute("SELECT name FROM nba_teams;") - team_names = [name[0] for name in cur.fetchall()] +def fetch_team_names(cur): + cur.execute("SELECT name FROM nba_teams;") + team_names = [name[0] for name in cur.fetchall()] return team_names @@ -394,58 +384,41 @@ def fetch_team_data(): return None -def update_team_stats(team_name, team_data): - with get_db_connection() as conn: - with conn.cursor() as cur: - team_ppg = team_data.get("ppg", None) - team_oppg = team_data.get("oppg", None) - team_wins = team_data.get("wins", None) - team_losses = team_data.get("loss", None) - team_bpg = ( - team_data.get("defensiveStats", {}).get("blk", {}).get("Total", None) - ) - team_spg = ( - team_data.get("defensiveStats", {}).get("stl", {}).get("Total", None) - ) - team_apg = ( - team_data.get("offensiveStats", {}).get("ast", {}).get("Total", None) - ) - team_fga = ( - team_data.get("offensiveStats", {}).get("fga", {}).get("Total", None) - ) - team_fgm = ( - team_data.get("offensiveStats", {}).get("fgm", {}).get("Total", None) - ) - team_fta = ( - team_data.get("offensiveStats", {}).get("fta", {}).get("Total", None) - ) - team_tov = ( - team_data.get("defensiveStats", {}).get("TOV", {}).get("Total", None) - ) - - cur.execute( - """ - UPDATE nba_teams - SET ppg = %s, oppg = %s, wins = %s, loss = %s, team_bpg = %s, team_spg = %s, team_apg = %s, - team_fga = %s, team_fgm = %s, team_fta = %s, team_tov = %s - WHERE LOWER(name) = LOWER(%s); - """, - ( - team_ppg, - team_oppg, - team_wins, - team_losses, - team_bpg, - team_spg, - team_apg, - team_fga, - team_fgm, - team_fta, - team_tov, - team_name, - ), - ) - conn.commit() +def update_team_stats(team_name, team_data, cur): + team_ppg = team_data.get("ppg", None) + team_oppg = team_data.get("oppg", None) + team_wins = team_data.get("wins", None) + team_losses = team_data.get("loss", None) + team_bpg = team_data.get("defensiveStats", {}).get("blk", {}).get("Total", None) + team_spg = team_data.get("defensiveStats", {}).get("stl", {}).get("Total", None) + team_apg = team_data.get("offensiveStats", {}).get("ast", {}).get("Total", None) + team_fga = team_data.get("offensiveStats", {}).get("fga", {}).get("Total", None) + team_fgm = team_data.get("offensiveStats", {}).get("fgm", {}).get("Total", None) + team_fta = team_data.get("offensiveStats", {}).get("fta", {}).get("Total", None) + team_tov = team_data.get("defensiveStats", {}).get("TOV", {}).get("Total", None) + + cur.execute( + """ + UPDATE nba_teams + SET ppg = %s, oppg = %s, wins = %s, loss = %s, team_bpg = %s, team_spg = %s, team_apg = %s, + team_fga = %s, team_fgm = %s, team_fta = %s, team_tov = %s + WHERE LOWER(name) = LOWER(%s); + """, + ( + team_ppg, + team_oppg, + team_wins, + team_losses, + team_bpg, + team_spg, + team_apg, + team_fga, + team_fgm, + team_fta, + team_tov, + team_name, + ), + ) # Main function to run each block in sequence @@ -460,10 +433,12 @@ def main(): logging.info(f"[1] fetching {player_id=} {season_year=}") stats_dict = fetch_player_game_stats(player_id, season_year) logging.info("[1] updating db") - update_player_game_stats(stats_dict, player_id) - time.sleep(0.05) + + with get_db_connection() as conn, conn.cursor() as cur: + update_player_game_stats(stats_dict, player_id, cur) + conn.commit() except Exception as e: - logging.info(f"[1] error: \n{format_exception(e)}") + logging.error(f"[1] error: \n{format_exception(e)}") try: # Block 2: Fetch and update player injuries @@ -471,11 +446,14 @@ def main(): injury_list = fetch_injury_list() if injury_list: logging.info(f"[2] updating {len(injury_list)} player injuries in db") - update_player_injuries(injury_list) + + with get_db_connection() as conn, conn.cursor() as cur: + update_player_injuries(injury_list, cur) + conn.commit() else: logging.info("[2] no injury data") except Exception as e: - logging.info(f"[2] error:\n{format_exception(e)}") + logging.error(f"[2] error:\n{format_exception(e)}") try: # Block 3: Fetch and update player info and season stats @@ -486,48 +464,56 @@ def main(): for first_name, full_names in grouped_names.items(): logging.info(f"[3] info for players with first name: {first_name}") players_data = fetch_player_info(first_name) - if players_data: - logging.info(f"[3] updating {len(players_data)} players") - for player_data in players_data: - api_full_name = player_data["longName"].strip() - if api_full_name.lower() in [name.lower() for name in full_names]: - update_player_info(player_data) - update_player_season_stats(player_data) - logging.info(f"[3] Processed {api_full_name}") - else: - logging.info( - f"[3] Player {api_full_name} not found in database for first name {first_name}" - ) - else: + if not players_data: logging.warning( f"[3] Failed to fetch info for players with first name: {first_name}" ) - time.sleep(0.05) + continue + logging.info(f"[3] updating {len(players_data)} players") + for player_data in players_data: + api_full_name = player_data["longName"].strip() + if api_full_name.lower() not in [name.lower() for name in full_names]: + logging.info( + f"[3] Player {api_full_name} not found in database for first name {first_name}" + ) + continue + + with get_db_connection() as conn, conn.cursor() as cur: + update_player_info(player_data, cur) + update_player_season_stats(player_data, cur) + conn.commit() + logging.info(f"[3] Processed {api_full_name}") except Exception as e: logging.error(f"[3] error:\n{format_exception(e)}") try: # Block 4: Fetch and update team stats logging.info("[4] fetching team stats") - team_names = fetch_team_names() + + with get_db_connection() as conn, conn.cursor() as cur: + team_names = fetch_team_names(cur) + conn.commit() teams_data = fetch_team_data() - if teams_data: - logging.info(f"[4] got {len(teams_data)} teams") - for team_name in team_names: - team_data = next( - ( - team - for team in teams_data - if team["teamName"].lower() == team_name.lower() - ), - None, - ) - if team_data: - update_team_stats(team_name, team_data) - else: - print(f"Skipping update for {team_name} (not found in API)") - else: - logging.warning("[4] no team data available") + + with get_db_connection() as conn, conn.cursor() as cur: + if teams_data: + logging.info(f"[4] got {len(teams_data)} teams") + for team_name in team_names: + team_data = next( + ( + team + for team in teams_data + if team["teamName"].lower() == team_name.lower() + ), + None, + ) + if team_data: + update_team_stats(team_name, team_data, cur) + else: + print(f"Skipping update for {team_name} (not found in API)") + else: + logging.warning("[4] no team data available") + conn.commit() except Exception as e: logging.error(f"[4] error:\n{format_exception(e)}") From 9b0df968c56cccd8e37b66e67632bbe480885a8d Mon Sep 17 00:00:00 2001 From: Alexander Ott <66271487+AlexanderHott@users.noreply.github.com> Date: Mon, 30 Dec 2024 15:39:30 -0800 Subject: [PATCH 2/2] start move to async --- .gitignore | 1 + daily_colab_task/daily_task.py | 42 ++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 519542e..ad1b839 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .venv/ .ruff_cache/ +.env diff --git a/daily_colab_task/daily_task.py b/daily_colab_task/daily_task.py index aa45067..903d188 100644 --- a/daily_colab_task/daily_task.py +++ b/daily_colab_task/daily_task.py @@ -1,7 +1,6 @@ import json import logging import os -import time from collections import defaultdict from datetime import date, datetime from traceback import format_exception @@ -10,6 +9,13 @@ import requests from psycopg2 import sql +try: + from dotenv import load_dotenv + + load_dotenv() +except ImportError: + pass + logging.basicConfig( level=logging.INFO, @@ -429,14 +435,14 @@ def main(): player_ids = fetch_player_ids() logging.info(f"[1] got {len(player_ids)} player stats") season_year = 2024 - for (player_id,) in player_ids: - logging.info(f"[1] fetching {player_id=} {season_year=}") - stats_dict = fetch_player_game_stats(player_id, season_year) - logging.info("[1] updating db") + with get_db_connection() as conn, conn.cursor() as cur: + for (player_id,) in player_ids: + logging.info(f"[1] fetching {player_id=} {season_year=}") + stats_dict = fetch_player_game_stats(player_id, season_year) + logging.info("[1] updating db") - with get_db_connection() as conn, conn.cursor() as cur: update_player_game_stats(stats_dict, player_id, cur) - conn.commit() + conn.commit() except Exception as e: logging.error(f"[1] error: \n{format_exception(e)}") @@ -470,19 +476,21 @@ def main(): ) continue logging.info(f"[3] updating {len(players_data)} players") - for player_data in players_data: - api_full_name = player_data["longName"].strip() - if api_full_name.lower() not in [name.lower() for name in full_names]: - logging.info( - f"[3] Player {api_full_name} not found in database for first name {first_name}" - ) - continue + with get_db_connection() as conn, conn.cursor() as cur: + for player_data in players_data: + api_full_name = player_data["longName"].strip() + if api_full_name.lower() not in [ + name.lower() for name in full_names + ]: + logging.info( + f"[3] Player {api_full_name} not found in database for first name {first_name}" + ) + continue - with get_db_connection() as conn, conn.cursor() as cur: update_player_info(player_data, cur) update_player_season_stats(player_data, cur) - conn.commit() - logging.info(f"[3] Processed {api_full_name}") + logging.info(f"[3] Processed {api_full_name}") + conn.commit() except Exception as e: logging.error(f"[3] error:\n{format_exception(e)}")