Skip to content

Commit 4e8e00d

Browse files
authored
Merge pull request #1 from 3box/add-query-for-update-significant
add update significant apps
2 parents 56ecac9 + bd9ec19 commit 4e8e00d

File tree

3 files changed

+104
-1
lines changed

3 files changed

+104
-1
lines changed

Diff for: get-clones/clones-lambda.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def handler():
4646
response = requests.get(
4747
'https://api.github.com/repos/ceramicstudio/{}/traffic/clones'.format(repo),
4848
headers=headers
49-
)
49+
)
5050
data = response.json()
5151
for clone_data in data.get('clones', {}):
5252
day_str = datetime.strptime(clone_data['timestamp'], "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d")

Diff for: get-clones/update-significant.py

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import gzip
2+
import requests
3+
import json
4+
import os
5+
import re
6+
import psycopg2.extras
7+
from base64 import b64decode
8+
from datetime import datetime, timezone, timedelta
9+
10+
11+
# Database connection parameters
12+
DB_HOST = os.environ['DB_HOST']
13+
DB_PORT = '5432'
14+
DB_USER = 'tsuser'
15+
DB_PASSWORD = os.environ['DB_PASSWORD']
16+
DB_NAME = 'tsdb'
17+
18+
GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
19+
20+
# Set up the headers with the authorization token
21+
headers = {
22+
'Authorization': f'token {GITHUB_TOKEN}'
23+
}
24+
25+
log_pattern = re.compile(r'received: (.*})')
26+
27+
def unix_to_datetime(unix_timestamp):
28+
return datetime.fromtimestamp(unix_timestamp, tz=timezone.utc)
29+
30+
def fetch_data(query, cur):
31+
cur.execute(query)
32+
return cur.fetchall()
33+
34+
def compare_and_update(last_week_data, prev_week_data, week, cur):
35+
# Convert list of tuples to dictionary for easy lookup
36+
last_week_dict = dict(last_week_data)
37+
prev_week_dict = dict(prev_week_data)
38+
39+
# Find new significant sources
40+
new_significant = [(source, week, count) for source, count in last_week_dict.items() if source not in prev_week_dict]
41+
42+
# Find missing significant sources
43+
missing_significant = [(source, week, prev_week_dict[source], last_week_dict.get(source, 0)) for source in prev_week_dict if source not in last_week_dict]
44+
45+
return new_significant, missing_significant
46+
47+
def update_tables(new_significant, missing_significant, cur):
48+
# Create or replace new_significant table
49+
cur.execute("DROP TABLE IF EXISTS new_significant;")
50+
cur.execute("CREATE TABLE new_significant (source text, week_ending DATE, unique_write_count bigint);")
51+
psycopg2.extras.execute_batch(cur, "INSERT INTO new_significant (source, week_ending, unique_write_count) VALUES (%s, %s, %s);", new_significant)
52+
53+
# Create or replace missing_significant table
54+
cur.execute("DROP TABLE IF EXISTS missing_significant;")
55+
cur.execute("CREATE TABLE missing_significant (source text, week_ending DATE, previous_count bigint, new_count bigint);")
56+
psycopg2.extras.execute_batch(cur, "INSERT INTO missing_significant (source, week_ending, previous_count, new_count) VALUES (%s, %s, %s, %s);", missing_significant)
57+
58+
59+
def handler():
60+
# Connect to the TimescaleDB
61+
62+
print("Connecting to db host" + DB_HOST)
63+
conn = psycopg2.connect(
64+
host=DB_HOST,
65+
port=DB_PORT,
66+
user=DB_USER,
67+
password=DB_PASSWORD,
68+
dbname=DB_NAME
69+
)
70+
cur = conn.cursor()
71+
72+
# Queries to execute
73+
query_last_week = """
74+
SELECT source, unique_write_count
75+
FROM top_sources_7d
76+
WHERE unique_write_count > 1000
77+
AND week = date_trunc('week', current_date) - interval '1 week';
78+
"""
79+
80+
query_prev_week = """
81+
SELECT source, unique_write_count
82+
FROM top_sources_7d
83+
WHERE unique_write_count > 1000
84+
AND week = date_trunc('week', current_date) - interval '2 weeks';
85+
"""
86+
87+
last_week_data = fetch_data(query_last_week, cur)
88+
prev_week_data = fetch_data(query_prev_week, cur)
89+
week = (datetime.now() - timedelta(days=datetime.now().weekday())).strftime('%Y-%m-%d')
90+
91+
new_significant, missing_significant = compare_and_update(last_week_data, prev_week_data, week, cur)
92+
93+
update_tables(new_significant, missing_significant, cur)
94+
95+
conn.commit()
96+
cur.close()
97+
conn.close()
98+
99+
print("Done, check the db")
100+
101+
handler()

Diff for: logs-to-tsdb/logs-lambda.py

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ def handler(event, context):
5454
# Insert logs into TimescaleDB
5555
insert_query = "INSERT INTO cas_log_data (timestamp, cid, did, model, family, stream, origin, cacao, cap_cid) VALUES %s"
5656

57+
print("Inserting {} rows including {}".format(len(batched), batched[0]))
58+
5759
psycopg2.extras.execute_values(cursor, insert_query, batched)
5860

5961
conn.commit()

0 commit comments

Comments
 (0)