-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFMXStyleFeatures.py
157 lines (124 loc) · 6.66 KB
/
FMXStyleFeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import fnmatch
from pathlib import Path
import pandas as pd
from FMXStyleparser import ObjectParser
class FeatureMatrixGenerator:
def __init__(self, directory, file_mask='*'):
"""
Initialize with a directory containing files and an optional file mask.
:param directory: The directory containing the files to process.
:param file_mask: The file mask to filter files (default is '*', which means all files).
"""
self.directory = directory
self.file_mask = file_mask
self.file_list = self._get_file_list()
self.all_features = set()
self.file_features = {}
def _get_file_list(self):
"""
Retrieve a list of files from the specified directory that match the file mask.
:return: A list of file paths.
"""
return [os.path.join(self.directory, f) for f in os.listdir(self.directory)
if fnmatch.fnmatch(f, self.file_mask) and os.path.isfile(os.path.join(self.directory, f))]
def extract_features(self, file_path):
"""
Placeholder feature extraction logic.
Replace this method with your actual extraction logic for each file.
"""
parser = ObjectParser()
parsed_objects = parser.read_from_file(file_path)
style_names = parser.list_style_names()
features = [t[1].lower() for t in style_names]
return features
def collect_features(self):
"""
Collect all features from each file and build a set of unique features.
"""
for file in self.file_list:
features = self.extract_features(file)
self.file_features[Path(file).stem] = features
self.all_features.update(features)
def print_not_implemented_features(self):
"""
Print the list of not implemented features for each file.
"""
for file in self.file_list:
implemented_features = set(self.file_features.get(Path(file).stem, []))
not_implemented_features = self.all_features - implemented_features
not_implemented_list = sorted(not_implemented_features)
not_implemented_str = ', '.join(not_implemented_list) if not_implemented_list else 'All features implemented'
print(f"{os.path.basename(file):<30} {not_implemented_str:<50}")
print()
def generate_matrix(self, sort_features=True):
"""
Generate a feature matrix indicating the presence (1) or absence (0) of each feature in each file.
:param sort_features: Whether to sort features alphabetically.
:return: A Pandas DataFrame with features as rows and files as columns.
"""
self.collect_features()
if sort_features:
sorted_features = sorted(self.all_features)
else:
sorted_features = list(self.all_features)
feature_matrix = pd.DataFrame(0, index=sorted_features, columns=list(self.file_features.keys()))
for file, features in self.file_features.items():
feature_matrix.loc[features, file] = 1
return feature_matrix
def save_matrix_markdown(self, matrix, filename='feature_matrix.md', title='Feature Matrix', description='This matrix shows the presence (1) or absence (0) of features across different files.'):
"""
Save the feature matrix to a Markdown file with a title, description, and not implemented features for each file.
:param matrix: The DataFrame containing the feature matrix.
:param filename: The name of the file to save the matrix.
:param title: The title of the Markdown document.
:param description: A descriptive paragraph for the matrix.
"""
with open(filename, 'w') as md_file:
# Write the title (using Markdown header syntax)
md_file.write(f"# {title}\n\n")
# Write the description paragraph
md_file.write(f"{description}\n\n")
# Write the header (the column names, which are the files)
header = ['Feature'] + list(matrix.columns)
md_file.write('| ' + ' | '.join(header) + ' |\n')
md_file.write('| ' + ' | '.join(['---'] * len(header)) + ' |\n')
# Write the data (features and presence/absence in each file)
for index, row in matrix.iterrows():
row_data = [index] + list(row)
row_data = ['1' if val == 1 else '0' for val in row_data[1:]] # Convert 1/0 to strings
md_file.write(f'| {index} | ' + ' | '.join(row_data) + ' |\n')
md_file.write('\n') # Add a newline after the table
# After the table, write the not implemented features for each file
for file in matrix.columns:
# Collect not implemented features for the current file
not_implemented_features = matrix.index[matrix[file] == 0].tolist()
# If there are not implemented features, list them, otherwise state that all features are implemented
if not_implemented_features:
md_file.write(f"### {file}\n\n")
md_file.write(f"**Not implemented features in {file}:**\n\n")
md_file.write(f"- " + '\n- '.join(not_implemented_features) + '\n\n')
else:
md_file.write(f"### {file}\n\n")
md_file.write(f"All features are implemented in {file}.\n\n")
print(f"Feature matrix and not implemented features saved as '{filename}' in Markdown format")
if __name__ == "__main__":
# Example usage:
directory = './StylesIn/' # Replace with your actual directory path
file_mask = '*.Style' # Change this to your desired file mask (e.g., '*.csv', '*.json', etc.)
# Instantiate the class
generator = FeatureMatrixGenerator(directory, file_mask)
# Generate the feature matrix
feature_matrix = generator.generate_matrix(sort_features=True)
# Set the display options to show all rows and columns
pd.set_option('display.max_rows', None) # Show all rows
pd.set_option('display.max_columns', None) # Show all columns
pd.set_option('display.width', 1000) # Set width to avoid wrapping
# Print the feature matrix
print(feature_matrix)
# Print not implemented features for each file
generator.print_not_implemented_features()
# Save the feature matrix as a markdown file with a custom title and description
title = "FMX Vector Style Feature Matrix"
description = "This document provides an overview of the features supported by each file in the directory."
generator.save_matrix_markdown(feature_matrix, 'FMX Vector Style Feature Matrix.md', title=title, description=description)