@@ -149,10 +149,24 @@ def __init__(self, config, task=None):
149
149
rs = pd .read_sql (max_msg_id_SQL , self .db )
150
150
msg_start = int (rs .iloc [0 ]["msg_id" ]) if rs .iloc [0 ]["msg_id" ] is not None else 25150
151
151
152
+ max_pr_labels_id_SQL = s .sql .text ("""
153
+ SELECT max(pr_label_id) AS label_id FROM pull_request_labels
154
+ """ )
155
+ rs = pd .read_sql (max_pr_labels_id_SQL , self .db )
156
+ label_start = int (rs .iloc [0 ]['label_id' ]) if rs .iloc [0 ]['label_id' ] else 25150
157
+
158
+ max_pr_event_id_SQL = s .sql .text ("""
159
+ SELECT MAX(pr_event_id) AS event_id FROM pull_request_events
160
+ """ )
161
+ rs = pd .read_sql (max_pr_event_id_SQL , self .db )
162
+ event_start = int (rs .iloc [0 ]['event_id' ]) if rs .iloc [0 ]['event_id' ] else 25150
163
+
152
164
# Increment so we are ready to insert the 'next one' of each of these most recent ids
153
165
self .pr_id_inc = (pr_start + 1 )
154
166
self .cntrb_id_inc = (cntrb_start + 1 )
155
167
self .msg_id_inc = (msg_start + 1 )
168
+ self .label_id_inc = (label_start + 1 )
169
+ self .event_id_inc = (event_start + 1 )
156
170
157
171
# self.run()
158
172
@@ -384,12 +398,121 @@ def query_pr(self, entry_info):
384
398
385
399
result = self .db .execute (self .pull_requests_table .insert ().values (pr ))
386
400
logging .info (f"Primary Key inserted pull_requests table: { result .inserted_primary_key } " )
387
- logging .info (f"Inserted PR data for { owner } /{ repo } " )
388
401
402
+ self .query_labels (pr_dict ['labels' ], self .pr_id_inc )
403
+ self .query_pr_events (owner , repo , pr_dict ['number' ], self .pr_id_inc )
404
+
405
+ logging .info (f"Inserted PR data for { owner } /{ repo } " )
389
406
self .results_counter += 1
390
407
self .pr_id_inc += 1
391
408
392
- self .register_task_completion (entry_info , 'pull_requests' )
409
+ self .register_task_completion (entry_info , 'pull_requests' )
410
+
411
+ def query_labels (self , labels , pr_id ):
412
+ logging .info ('Querying PR Labels' )
413
+ pseudo_key_gh = 'id'
414
+ psuedo_key_augur = 'pr_src_id'
415
+ table = 'pull_request_labels'
416
+ pr_labels_table_values = self .get_table_values ({psuedo_key_augur : pseudo_key_gh }, [table ])
417
+
418
+ new_labels = self .check_duplicates (labels , pr_labels_table_values , pseudo_key_gh )
419
+
420
+ if len (new_labels ) == 0 :
421
+ logging .info ('No new labels to add' )
422
+ return
423
+
424
+ logging .info (f'Found { len (new_labels )} labels' )
425
+
426
+ for label_dict in new_labels :
427
+
428
+ label = {
429
+ 'pr_label_id' : self .label_id_inc ,
430
+ 'pull_request_id' : pr_id ,
431
+ 'pr_src_id' : label_dict ['id' ],
432
+ 'pr_src_node_id' : label_dict ['node_id' ],
433
+ 'pr_src_url' : label_dict ['url' ],
434
+ 'pr_src_description' : label_dict ['name' ],
435
+ 'pr_src_color' : label_dict ['color' ],
436
+ 'pr_src_default_bool' : label_dict ['default' ],
437
+ 'tool_source' : self .tool_source ,
438
+ 'tool_version' : self .tool_version ,
439
+ 'data_source' : self .data_source ,
440
+ 'data_collection_date' : datetime .now ().strftime ('%Y-%m-%dT%H:%M:%SZ' )
441
+ }
442
+
443
+ result = self .db .execute (self .pull_request_labels_table .insert ().values (label ))
444
+ logging .info (f"Primary Key inserted in pull_request_labels table: { result .inserted_primary_key } " )
445
+ logging .info (f"Inserted PR Labels data for PR with id { pr_id } " )
446
+
447
+ self .results_counter += 1
448
+ self .label_id_inc += 1
449
+
450
+ def query_pr_events (self , owner , repo , gh_pr_no , pr_id ):
451
+ logging .info ('Querying PR Events' )
452
+
453
+ url = (f'https://api.github.com/repos/{ owner } /{ repo } /issues/'
454
+ + f'{ gh_pr_no } /events?per_page=100' )
455
+
456
+ pseudo_key_gh = 'id'
457
+ psuedo_key_augur = 'pr_event_id'
458
+ table = 'pull_request_events'
459
+ pr_events_table_values = self .get_table_values ({psuedo_key_augur : pseudo_key_gh }, [table ])
460
+
461
+ pr_events = []
462
+ try :
463
+ while True :
464
+ r = requests .get (url , headers = self .headers )
465
+ self .update_rate_limit (r )
466
+
467
+ j = r .json ()
468
+
469
+ new_pr_events = self .check_duplicates (j , pr_events_table_values , pseudo_key_gh )
470
+
471
+ if len (new_pr_events ) == 0 :
472
+ logging .info ('No new PR Events to add... Exiting Pagination' )
473
+ break
474
+ else :
475
+ pr_events += new_pr_events
476
+
477
+ if 'next' not in r .links :
478
+ break
479
+ else :
480
+ url = r .links ['next' ]['url' ]
481
+ except Exception as e :
482
+ logging .error (f'Caught Exception on url { url } ' )
483
+ logging .error (str (e ))
484
+ logging .info (f'Not adding PR events for PR { pr_id } ' )
485
+ return
486
+
487
+ for pr_event_dict in pr_events :
488
+
489
+ if 'actor' in pr_event_dict :
490
+ cntrb_id = self .find_id_from_login (pr_event_dict ['actor' ]['login' ])
491
+ else :
492
+ cntrb_id = None
493
+
494
+ pr_event = {
495
+ 'pr_event_id' : self .event_id_inc ,
496
+ 'pull_request_id' : pr_id ,
497
+ 'cntrb_id' : cntrb_id ,
498
+ 'action' : pr_event_dict ['event' ],
499
+ 'action_commit_hash' : None ,
500
+ 'created_at' : pr_event_dict ['created_at' ],
501
+ 'issue_event_src_id' : pr_event_dict ['id' ],
502
+ 'node_id' : pr_event_dict ['node_id' ],
503
+ 'node_url' : pr_event_dict ['url' ],
504
+ 'tool_source' : self .tool_source ,
505
+ 'tool_version' : self .tool_version ,
506
+ 'data_source' : self .data_source ,
507
+ 'data_collection_date' : datetime .now ().strftime ('%Y-%m-%dT%H:%M:%SZ' )
508
+ }
509
+
510
+ result = self .db .execute (self .pull_request_events_table .insert ().values (pr_event ))
511
+ logging .info (f"Primary Key inserted in pull_request_events table: { result .inserted_primary_key } " )
512
+ logging .info (f"Inserted PR Events data for PR with id { pr_id } " )
513
+
514
+ self .results_counter += 1
515
+ self .event_id_inc += 1
393
516
394
517
def query_contributors (self , entry_info ):
395
518
0 commit comments