Skip to content

Commit c0fb5be

Browse files
authored
Merge pull request #678 from UTDNebula/feat/diff
NP-92 Create diffing script for degrees
2 parents 5f83c12 + 737d268 commit c0fb5be

File tree

3 files changed

+138
-0
lines changed

3 files changed

+138
-0
lines changed

.github/workflows/versioning.yml

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: Scheduled Versioning
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
- cron: '0 0 15 8 *'
7+
8+
jobs:
9+
versioning:
10+
name: Versioning System
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- name: Checkout
15+
uses: actions/checkout@v3
16+
17+
- uses: actions/setup-python@v4
18+
with:
19+
python-version: '3.10'
20+
21+
- name: Install dependencies
22+
working-directory: validator
23+
run: |
24+
pip3 install -r requirements.txt
25+
26+
- name: Execute versioning script
27+
working-directory: ./validator/scripts
28+
run: python diff.py

validator/requirements.txt

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
absl-py==1.2.0
22
async-timeout==4.0.2
33
attrs==22.2.0
4+
beautifulsoup4==4.12.0
45
black==23.1.0
56
certifi==2023.7.22
67
charset-normalizer==2.1.1
@@ -17,6 +18,7 @@ idna==3.4
1718
iniconfig==2.0.0
1819
itsdangerous==2.1.2
1920
Jinja2==3.1.2
21+
jira==3.5.2
2022
jsonschema==4.17.3
2123
limits==2.8.0
2224
MarkupSafe==2.1.1
@@ -43,8 +45,10 @@ ruamel.yaml==0.17.21
4345
ruamel.yaml.clib==0.2.7
4446
six==1.16.0
4547
tomli==2.0.1
48+
types-beautifulsoup4==4.12.0.6
4649
types-Flask-Cors==3.0.10.2
4750
types-jsonschema==4.17.0.6
51+
types-requests==2.31.0.2
4852
typing_extensions==4.4.0
4953
urllib3==1.26.13
5054
Werkzeug==2.2.3

validator/scripts/diff.py

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import requests
2+
import json
3+
import re
4+
import os
5+
import difflib
6+
from bs4 import BeautifulSoup
7+
from dotenv import load_dotenv
8+
from jira import JIRA
9+
10+
"""This script looks through all major/concentration
11+
json files to find if any requirements have changed
12+
over the year. If so, it raises a JIRA ticket with
13+
requirement change information
14+
"""
15+
16+
load_dotenv()
17+
jira_api_key = os.getenv('JIRA_API_KEY')
18+
major_json_path = "/home/runner/work/planner/planner/validator/degree_data"
19+
20+
#Extracts html from url and sends it to course extractor
21+
def get_req_content(url: str) -> str:
22+
response = requests.get(url)
23+
if(response.status_code == 200):
24+
return response.text
25+
else:
26+
return "Webpage not found"
27+
28+
#Extracts the courses from each major and sends them to a set
29+
def extract_courses(webData: str) -> set[str]:
30+
bs = BeautifulSoup(webData, features="html.parser")
31+
courses = set()
32+
course_elements = bs.find_all('a', href=True)
33+
34+
for course_element in course_elements:
35+
course_name = course_element.text.strip()
36+
pattern = r'\b[A-Z]{2,4} \d{4}\b'
37+
38+
if re.search(pattern, course_name):
39+
courses.add(course_name)
40+
return courses
41+
42+
#Diffs between webpages and works with the course diff sets
43+
def htmldiff(previousYearURL: str, currentYearURL: str, oldCourses: set[str], newCourses: set[str]) -> str:
44+
oldContent = get_req_content(previousYearURL)
45+
newContent = get_req_content(currentYearURL)
46+
47+
oldCourses.update(extract_courses(oldContent))
48+
newCourses.update(extract_courses(newContent))
49+
50+
bsOld = BeautifulSoup(oldContent, features="lxml").find('div', attrs = {'id':'bukku-page'})
51+
bsNew = BeautifulSoup(newContent, features="lxml").find('div', attrs = {'id':'bukku-page'})
52+
53+
if bsNew is None or bsOld is None:
54+
return ""
55+
56+
bsOldLines = bsOld.get_text().split('\n')
57+
bsNewLines = bsNew.get_text().split('\n')
58+
59+
diff = difflib.ndiff(bsOldLines, bsNewLines)
60+
diffString = "```"
61+
for line in diff:
62+
diffString+=line+'\n'
63+
64+
return diffString + "```"
65+
66+
#Creates a ticket based on issue type, including URI and impacted courses in ticket
67+
#C issue type = Course renamed/added/removed
68+
#R issue type = Major/concentration removed
69+
def createTicket(issueType: str, jira_connection: JIRA, URI: str, coursesImpacted: set[str], diffCodeBlock: str) -> None:
70+
description = "This is an automated diff script used to detect discrepancies between major requirements\nURI: " + URI + "\n"
71+
description += "Major: " + URI.split("/")[-1] + "\n"
72+
if issueType == 'R':
73+
description += "This major/concentration has been renamed or removed\n\n"
74+
elif issueType == 'C':
75+
description += "The following course(s) have been renamed/added/removed:\n" + str(coursesImpacted) + "\n\n"
76+
description+="Below is a preview of the diff:\n" + diffCodeBlock
77+
jira_connection.create_issue(
78+
project='NP',
79+
summary='Course requirement version changes',
80+
description=description,
81+
issuetype={'name': 'Task'}
82+
)
83+
84+
#Establishes JIRA connection and ierates through each major for versioning issues
85+
if __name__ == "__main__":
86+
jira_connection = JIRA(
87+
basic_auth=('[email protected]', jira_api_key),
88+
server="https://nebula-labs.atlassian.net"
89+
)
90+
for majorReqJson in os.scandir(major_json_path):
91+
data = json.loads(open(f"/home/runner/work/planner/planner/validator/degree_data/" + majorReqJson.name, "r").read())
92+
catalog_uri=data["catalog_uri"]
93+
yearRegex = r'/(\d{4})/'
94+
result = re.search(yearRegex, catalog_uri)
95+
if result:
96+
match = str(int(result.group(1))+1)
97+
previousYearURL = data["catalog_uri"]
98+
currentYearURL = re.sub(yearRegex, f'/{ str(match) }/', data["catalog_uri"])
99+
oldCourses: set[str] = set()
100+
newCourses: set[str] = set()
101+
pageDiff = htmldiff(previousYearURL, currentYearURL, oldCourses, newCourses)
102+
if len(newCourses) == 0:
103+
createTicket('R', jira_connection, re.sub(yearRegex, f'/{ match }/', data["catalog_uri"]), set(), pageDiff)
104+
else:
105+
createTicket('C', jira_connection, re.sub(yearRegex, f'/{ match }/', data["catalog_uri"]), (newCourses-oldCourses).union(oldCourses-newCourses), pageDiff)
106+

0 commit comments

Comments
 (0)