Merge pull request #678 from UTDNebula/feat/diff

akevinge · web-flow · commit c0fb5be43bad · 2023-09-26T14:50:26.000-05:00
NP-92 Create diffing script for degrees
diff --git a/.github/workflows/versioning.yml b/.github/workflows/versioning.yml
@@ -0,0 +1,28 @@
+name: Scheduled Versioning
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 0 15 8 *'
+
+jobs:
+  versioning:
+    name: Versioning System
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Install dependencies
+        working-directory: validator
+        run: |
+          pip3 install -r requirements.txt
+
+      - name: Execute versioning script
+        working-directory: ./validator/scripts
+        run: python diff.py
diff --git a/validator/requirements.txt b/validator/requirements.txt
@@ -1,6 +1,7 @@
 absl-py==1.2.0
 async-timeout==4.0.2
 attrs==22.2.0
+beautifulsoup4==4.12.0
 black==23.1.0
 certifi==2023.7.22
 charset-normalizer==2.1.1
@@ -17,6 +18,7 @@ idna==3.4
 iniconfig==2.0.0
 itsdangerous==2.1.2
 Jinja2==3.1.2
+jira==3.5.2
 jsonschema==4.17.3
 limits==2.8.0
 MarkupSafe==2.1.1
@@ -43,8 +45,10 @@ ruamel.yaml==0.17.21
 ruamel.yaml.clib==0.2.7
 six==1.16.0
 tomli==2.0.1
+types-beautifulsoup4==4.12.0.6
 types-Flask-Cors==3.0.10.2
 types-jsonschema==4.17.0.6
+types-requests==2.31.0.2
 typing_extensions==4.4.0
 urllib3==1.26.13
 Werkzeug==2.2.3
diff --git a/validator/scripts/diff.py b/validator/scripts/diff.py
@@ -0,0 +1,106 @@
+import requests
+import json
+import re
+import os
+import difflib
+from bs4 import BeautifulSoup
+from dotenv import load_dotenv
+from jira import JIRA
+
+"""This script looks through all major/concentration
+json files to find if any requirements have changed
+over the year. If so, it raises a JIRA ticket with 
+requirement change information
+"""
+
+load_dotenv()
+jira_api_key = os.getenv('JIRA_API_KEY')
+major_json_path = "/home/runner/work/planner/planner/validator/degree_data"
+
+#Extracts html from url and sends it to course extractor
+def get_req_content(url: str) -> str:
+    response = requests.get(url)
+    if(response.status_code == 200):
+        return response.text
+    else:
+        return "Webpage not found"
+
+#Extracts the courses from each major and sends them to a set
+def extract_courses(webData: str) -> set[str]:
+    bs = BeautifulSoup(webData, features="html.parser")
+    courses = set()
+    course_elements = bs.find_all('a', href=True)
+
+    for course_element in course_elements:
+        course_name = course_element.text.strip()
+        pattern = r'\b[A-Z]{2,4} \d{4}\b'
+        
+        if re.search(pattern, course_name):
+            courses.add(course_name)
+    return courses
+
+#Diffs between webpages and works with the course diff sets
+def htmldiff(previousYearURL: str, currentYearURL: str, oldCourses: set[str], newCourses: set[str]) -> str:
+    oldContent = get_req_content(previousYearURL)
+    newContent = get_req_content(currentYearURL)
+
+    oldCourses.update(extract_courses(oldContent))
+    newCourses.update(extract_courses(newContent))
+    
+    bsOld = BeautifulSoup(oldContent, features="lxml").find('div', attrs = {'id':'bukku-page'})
+    bsNew = BeautifulSoup(newContent, features="lxml").find('div', attrs = {'id':'bukku-page'})
+
+    if bsNew is None or bsOld is None:
+        return ""
+
+    bsOldLines = bsOld.get_text().split('\n')
+    bsNewLines = bsNew.get_text().split('\n')
+
+    diff = difflib.ndiff(bsOldLines, bsNewLines)
+    diffString = "```"
+    for line in diff:
+        diffString+=line+'\n'
+
+    return diffString + "```"
+
+#Creates a ticket based on issue type, including URI and impacted courses in ticket
+#C issue type = Course renamed/added/removed
+#R issue type = Major/concentration removed
+def createTicket(issueType: str, jira_connection: JIRA, URI: str, coursesImpacted: set[str], diffCodeBlock: str) -> None:
+    description = "This is an automated diff script used to detect discrepancies between major requirements\nURI: " + URI + "\n"
+    description += "Major: " + URI.split("/")[-1] + "\n"
+    if issueType == 'R':
+        description += "This major/concentration has been renamed or removed\n\n"
+    elif issueType == 'C':
+        description += "The following course(s) have been renamed/added/removed:\n" + str(coursesImpacted) + "\n\n"
+        description+="Below is a preview of the diff:\n" + diffCodeBlock
+    jira_connection.create_issue(
+        project='NP',
+        summary='Course requirement version changes',
+        description=description,
+        issuetype={'name': 'Task'}
+    )
+
+#Establishes JIRA connection and ierates through each major for versioning issues
+if __name__ == "__main__":
+    jira_connection = JIRA(
+        basic_auth=('planner@utdnebula.com', jira_api_key),
+        server="https://nebula-labs.atlassian.net"
+    )
+    for majorReqJson in os.scandir(major_json_path):
+        data = json.loads(open(f"/home/runner/work/planner/planner/validator/degree_data/" + majorReqJson.name, "r").read())
+        catalog_uri=data["catalog_uri"]
+        yearRegex = r'/(\d{4})/'
+        result = re.search(yearRegex, catalog_uri)
+        if result:
+            match = str(int(result.group(1))+1)
+            previousYearURL = data["catalog_uri"]
+            currentYearURL = re.sub(yearRegex, f'/{ str(match) }/', data["catalog_uri"])
+            oldCourses: set[str] = set()
+            newCourses: set[str] = set()
+            pageDiff = htmldiff(previousYearURL, currentYearURL, oldCourses, newCourses)
+            if len(newCourses) == 0:
+                createTicket('R', jira_connection, re.sub(yearRegex, f'/{ match }/', data["catalog_uri"]), set(), pageDiff)
+            else:
+                createTicket('C', jira_connection, re.sub(yearRegex, f'/{ match }/', data["catalog_uri"]), (newCourses-oldCourses).union(oldCourses-newCourses), pageDiff)
+