-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsv2xlsx.py
149 lines (128 loc) · 7.21 KB
/
csv2xlsx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""
File: csv2xlsx.py
Description: The csv2xlsx converter processes all the generated CSV files and converts them into a single XLSX file. It also
highlights the keyword in the paragraph and requirement columns. The output XLSX file will have three sheets:
- new_requirements
- latency_paragraphs
- latency_no_paragraphs
Contributors:
Amirpooya Asadollahnejad
David Schön
Marcelo Santibáñez
Adrian Hassa
Created: 2024-10-03
Last Modified: 2024-12-10
Project: 3GPP Requirement Tools
URL: https://github.com/Adrian2901/3gpp-requirements-tools
License: MIT License (see LICENSE file for details)
"""
import csv
import xlsxwriter
import re
def highlight_keyword(paragraph, column, keyword, sheet, row_idx, bold_format, wrap_format):
matches = list(re.finditer(f'({keyword})', paragraph, flags=re.IGNORECASE)) # Use finditer for all matches
rich_text = []
start_index = 0
for match in matches: # Logic of this loop comes from CHAT-GPT
# Add the text before the match
if match.start() > start_index:
rich_text.append(paragraph[start_index:match.start()]) # Normal text
# Append the matched keyword with bold format
rich_text.append(bold_format) # Bold format for keyword
rich_text.append(match.group(0)) # The actual keyword
start_index = match.end() # Update start_index to end of match
# Add any remaining text after the last match
if start_index < len(paragraph):
rich_text.append(paragraph[start_index:]) # Normal text
# Write the rich text back to the 'Paragraph' cell with wrapping
if rich_text:
sheet.write_rich_string(row_idx, column, *rich_text) # Column 2: Paragraph
else:
# Write paragraph normally with wrapping if the keyword is not found
sheet.write(row_idx, column, paragraph, wrap_format)
sheet.set_row(row_idx, None, wrap_format) # Enable text wrapping for the entire row
# Defautl input_csv is a list of the three csv files containing the default names we used in the project
def csv_to_xlsx(config, update):
keyword = config['keywords'][0] # keyword to highlight TODO: iterate thru the list of keywords
latency_possible = config["output_folder_path"] + "/possible_paragraphs.csv"
latency_no = config["output_folder_path"] + "/no_paragraphs.csv"
new_requirements = config["output_folder_path"] + "/new_requirements.csv"
output_xlsx = config["output_folder_path"] + "/output.xlsx" #output xlsx file
update("Processing CSV to XLSX...")
print(latency_possible)
print(latency_no)
print(new_requirements)
# different input files
# 1 and 2 have the same structure so different logic is only needed for 0
# Create a new XLSX file
workbook = xlsxwriter.Workbook(output_xlsx)
# Add a format for the header cells
header_format = workbook.add_format({'bold': True, 'align': 'center', 'valign': 'vcenter', 'fg_color': '#D7E4BC'})
# wrap text
wrap_format = workbook.add_format({'text_wrap': True, 'valign': 'top'})
# bold format for the keyword
bold_format = workbook.add_format({'bold': True})
# Create a sheet for each input file
workbook.add_worksheet("new_requirements")
workbook.add_worksheet("latency_possible_paragraphs")
workbook.add_worksheet("latency_no_paragraphs")
# Create columns for the sheets
for sheet in workbook.worksheets():
sheet.write_row(0, 0, ["File", "Chapter", "Paragraph", "LLM Response"], header_format) # headers
sheet.set_column('A:A', 10) # File column
sheet.set_column('B:B', 18) # Chapter column
sheet.set_column('C:C', 45, wrap_format) # Paragraph column
sheet.set_column('D:D', 10) # LLM Response column
if sheet.get_name() == "new_requirements":
sheet.write(0, 4, "Requirement", header_format) # Requirement column
sheet.set_column('E:E', 45) # Requirement column
# read new_requirements.csv and write to its sheet
with open(new_requirements, 'r', encoding='utf-8') as f:
reader = csv.reader(f, delimiter=';')
for row in reader:
file, chapter, paragraph, llm_response, requirement = row
sheet = workbook.get_worksheet_by_name("new_requirements") # Get the sheet
row_idx = sheet.dim_rowmax + 1 # Get the next available row (after the header)
sheet.write(row_idx, 0, file) # Column 0: File
sheet.write(row_idx, 1, chapter) # Column 1: Chapter
sheet.write(row_idx, 2, paragraph, wrap_format) # Column 2: Paragraph
sheet.write(row_idx, 3, llm_response) # Column 3: LLM Response
sheet.write(row_idx, 4, requirement, wrap_format) # Column 4: Requirement
# Check if the keyword is in the paragraph and highlight it
column = 2 #arbitrarily passing the column number now, 2 is the paragraph column
highlight_keyword(paragraph, column, keyword, sheet, row_idx, bold_format, wrap_format)
# highlight keyword for the requirement column
column = 4 #column 4 = requirement column
highlight_keyword(requirement, column, keyword, sheet, row_idx, bold_format, wrap_format)
# read latency_possible.csv and write to its sheet
with open(latency_possible, 'r', encoding='utf-8') as f:
reader = csv.reader(f, delimiter=';')
for row in reader:
file, chapter, paragraph, llm_response = row
sheet = workbook.get_worksheet_by_name("latency_possible_paragraphs")
row_idx = sheet.dim_rowmax + 1 # Get the next available row (after the header)
sheet.write(row_idx, 0, file) # Column 0: File
sheet.write(row_idx, 1, chapter) # Column 1: Chapter
sheet.write(row_idx, 2, paragraph, wrap_format) # Column 2: Paragraph
sheet.write(row_idx, 3, llm_response) # Column 3: LLM Response
# Check if the keyword is in the paragraph and highlight it
column = 2 # paragraph column
highlight_keyword(paragraph, column, keyword, sheet, row_idx, bold_format, wrap_format)
# read latency_no.csv and write to its sheet
with open(latency_no, 'r', encoding='utf-8') as f:
reader = csv.reader(f, delimiter=';')
for row in reader:
file, chapter, paragraph, llm_response = row
sheet = workbook.get_worksheet_by_name("latency_no_paragraphs")
row_idx = sheet.dim_rowmax + 1 # Get the next available row (after the header)
sheet.write(row_idx, 0, file) # Column 0: File
sheet.write(row_idx, 1, chapter) # Column 1: Chapter
sheet.write(row_idx, 2, paragraph, wrap_format) # Column 2: Paragraph
sheet.write(row_idx, 3, llm_response) # Column 3: LLM Response
column = 2 # paragraph column
highlight_keyword(paragraph, column, keyword, sheet, row_idx, bold_format, wrap_format)
#close the workbook
workbook.close()
update("Done! Check the output folder for the results.")
if __name__ == "__main__":
print("This is a script that re-structures the output.\nUsage: python gui_requirements.py")