Skip to content

Commit 77d8632

Browse files
committed
inline code documentation updates and removed textfile as dsi reader
1 parent c6bef32 commit 77d8632

File tree

5 files changed

+75
-100
lines changed

5 files changed

+75
-100
lines changed

dsi/backends/sqlite.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ def check_type(self, input_list):
7272
return " VARCHAR"
7373
return ""
7474

75+
# OLD NAME OF ingest_table_helper. TO BE DEPRECATED IN FUTURE DSI RELEASE
76+
def put_artifact_type(self, types, foreign_query = None, isVerbose=False):
77+
self.ingest_table_helper(types, foreign_query, isVerbose)
78+
7579
def ingest_table_helper(self, types, foreign_query = None, isVerbose=False):
7680
"""
7781
Helper function to create SQLite table based on the passed in schema. Used within ingest_artifact()
@@ -113,13 +117,9 @@ def ingest_table_helper(self, types, foreign_query = None, isVerbose=False):
113117
self.cur.execute(str_query)
114118
self.types = types
115119

116-
# OLD NAME OF ingest_table_helper. TO BE DEPRECATED IN FUTURE DSI RELEASE
117-
def put_artifact_type(self, types, foreign_query = None, isVerbose=False):
118-
self.ingest_table_helper(self, types, foreign_query, isVerbose)
119-
120120
# OLD NAME OF ingest_artifacts(). TO BE DEPRECATED IN FUTURE DSI RELEASE
121121
def put_artifacts(self, collection, isVerbose=False):
122-
return self.ingest_artifacts(self, collection, isVerbose)
122+
return self.ingest_artifacts(collection, isVerbose)
123123

124124
def ingest_artifacts(self, collection, isVerbose=False):
125125
"""
@@ -681,6 +681,8 @@ def close(self):
681681
# OLD FUNCTION TO DEPRECATE
682682
def put_artifacts_t(self, collection, tableName="TABLENAME", isVerbose=False):
683683
"""
684+
DSI 1.0 FUNCTIONALITY - DEPRECATING SOON, DO NOT USE
685+
684686
Primary class for insertion of collection of Artifacts metadata into a defined schema, with a table passthrough
685687
686688
`collection`: A Python Collection of an Artifact derived class that has multiple regular structures of a defined schema,

dsi/core.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class Terminal():
2626
PLUGIN_PREFIX = ['dsi.plugins']
2727
PLUGIN_IMPLEMENTATIONS = ['env', 'file_reader', 'file_writer']
2828
VALID_ENV = ['Hostname', 'SystemKernel', 'GitInfo']
29-
VALID_READERS = ['Bueno', 'Csv', 'YAML1', 'TOML1', 'Schema', 'TextFile', 'MetadataReader1', 'Wildfire']
29+
VALID_READERS = ['Bueno', 'Csv', 'YAML1', 'TOML1', 'Schema', 'MetadataReader1', 'Wildfire']
3030
VALID_WRITERS = ['ER_Diagram', 'Table_Plot', 'Csv_Writer']
3131
VALID_PLUGINS = VALID_ENV + VALID_READERS + VALID_WRITERS
3232
VALID_BACKENDS = ['Gufi', 'Sqlite', 'Parquet']
@@ -41,14 +41,13 @@ def __init__(self, debug = 0, backup_db = False, runTable = False):
4141
4242
Optional flags can be set and defined:
4343
44-
`debug`: {0: off, 1: user debug log, 2: user + developer debug log}.
45-
When set to 1 or 2, debug info will write to a local debug.log text file with various benchmarks.
46-
44+
`debug`: {0: off, 1: user debug log, 2: user + developer debug log}
45+
46+
- When set to 1 or 2, debug info will write to a local debug.log text file with various benchmarks.
4747
`backup_db`: Undefined False as default. If set to True, this creates a backup database before committing new changes.
4848
4949
`runTable`: Undefined False as default.
50-
When new metadata is ingested, a 'runTable' is created, appended, and timestamped when database in incremented.
51-
Recommended for in-situ use-cases.
50+
When new metadata is ingested, a 'runTable' is created, appended, and timestamped when database in incremented. Recommended for in-situ use-cases.
5251
"""
5352
def static_munge(prefix, implementations):
5453
return (['.'.join(i) for i in product(prefix, implementations)])
@@ -278,6 +277,7 @@ def add_external_python_module(self, mod_type, mod_name, mod_path):
278277
"""
279278
mod = SourceFileLoader(mod_name, mod_path).load_module()
280279
self.module_collection[mod_type][mod_name] = mod
280+
self.VALID_MODULES.append(mod_name)
281281

282282
def list_loaded_modules(self):
283283
"""

dsi/plugins/file_reader.py

+55-88
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class Csv(FileReader):
3939
"""
4040
def __init__(self, filenames, table_name = None, **kwargs):
4141
"""
42-
Initializes CSV Reader with user specified parameters. INCLUDE NECESSARY PARAMETERS WHEN LOADING THIS PLUGIN WITH CORE.LOAD_MODULE()
42+
Initializes CSV Reader with user specified parameters.
4343
4444
`filenames`: Required input. List of CSV files, or just one CSV files to store in DSI. If a list, data in all files must be for the same table
4545
@@ -75,74 +75,6 @@ def add_rows(self) -> None:
7575

7676
self.set_schema_2(self.csv_data)
7777

78-
class Wildfire(FileReader):
79-
"""
80-
A Structured Data Plugin to ingest Wildfire data stored as a CSV
81-
82-
Can be used for other cases if data is post-processed and running only once.
83-
84-
Can create a manual simulation table
85-
"""
86-
def __init__(self, filenames, table_name = None, sim_table = False, **kwargs):
87-
"""
88-
Initializes Wildfire Reader with user specified parameters.
89-
90-
`filenames`: Required input -- Wildfire data files
91-
92-
`table_name`: default None. User can specify table name when loading the wildfire file.
93-
94-
`sim_table`: default False. Set to True if creating manual simulation table where each row of Wildfire file is a separate sim
95-
96-
- also creates new column in wildfire data for each row to associate to a corresponding row/simulation in sim_table
97-
"""
98-
super().__init__(filenames, **kwargs)
99-
self.csv_data = OrderedDict()
100-
if isinstance(filenames, str):
101-
self.filenames = [filenames]
102-
else:
103-
self.filenames = filenames
104-
self.table_name = table_name
105-
self.sim_table = sim_table
106-
107-
def add_rows(self) -> None:
108-
"""
109-
Creates Ordered Dictionary for the wildfire data.
110-
111-
If sim_table = True, a sim_table Ordered Dict also created, and both are nested within a larger Ordered Dict.
112-
"""
113-
if self.table_name is None:
114-
self.table_name = "Wildfire"
115-
116-
total_df = DataFrame()
117-
for filename in self.filenames:
118-
temp_df = read_csv(filename)
119-
try:
120-
total_df = concat([total_df, temp_df], axis=0, ignore_index=True)
121-
except:
122-
return (ValueError, f"Error in adding {filename} to the existing wildfire data. Please recheck column names and data structure")
123-
124-
if self.sim_table:
125-
total_df['sim_id'] = range(1, len(total_df) + 1)
126-
total_df = total_df[['sim_id'] + [col for col in total_df.columns if col != 'sim_id']]
127-
128-
total_data = OrderedDict(total_df.to_dict(orient='list'))
129-
for col, coldata in total_data.items(): # replace NaNs with None
130-
total_data[col] = [None if type(item) == float and isnan(item) else item for item in coldata]
131-
132-
self.csv_data[self.table_name] = total_data
133-
134-
if self.sim_table:
135-
sim_list = list(range(1, len(total_df) + 1))
136-
sim_dict = OrderedDict([('sim_id', sim_list)])
137-
self.csv_data["simulation"] = sim_dict
138-
139-
relation_dict = OrderedDict([('primary_key', []), ('foreign_key', [])])
140-
relation_dict["primary_key"].append(("simulation", "sim_id"))
141-
relation_dict["foreign_key"].append((self.table_name, "sim_id"))
142-
self.csv_data["dsi_relations"] = relation_dict
143-
144-
self.set_schema_2(self.csv_data)
145-
14678
class Bueno(FileReader):
14779
"""
14880
A Structured Data Plugin to capture performance data from Bueno (github.com/lanl/bueno)
@@ -449,37 +381,72 @@ def add_rows(self) -> None:
449381
del self.toml_data["dsi_units"]
450382
self.set_schema_2(self.toml_data)
451383

452-
class TextFile(FileReader):
384+
class Wildfire(FileReader):
453385
"""
454-
Structured Data Plugin to read in an individual or a set of text files
386+
A Structured Data Plugin to ingest Wildfire data stored as a CSV
455387
456-
Table names are the keys for the main ordered dictionary and column names are the keys for each table's nested ordered dictionary
388+
Can be used for other cases if data is post-processed and running only once.
389+
Can create a manual simulation table
457390
"""
458-
def __init__(self, filenames, target_table_prefix = None, **kwargs):
391+
def __init__(self, filenames, table_name = None, sim_table = False, **kwargs):
459392
"""
460-
`filenames`: one text file or a list of text files to be ingested
393+
Initializes Wildfire Reader with user specified parameters.
394+
395+
`filenames`: Required input -- Wildfire data files
396+
397+
`table_name`: default None. User can specify table name when loading the wildfire file.
461398
462-
`target_table_prefix`: prefix to be added to every table created to differentiate between other text file sources
399+
`sim_table`: default False. Set to True if creating manual simulation table where each row of Wildfire file is a separate sim
400+
401+
- also creates new column in wildfire data for each row to associate to a corresponding row/simulation in sim_table
463402
"""
464403
super().__init__(filenames, **kwargs)
404+
self.csv_data = OrderedDict()
465405
if isinstance(filenames, str):
466-
self.text_files = [filenames]
406+
self.filenames = [filenames]
467407
else:
468-
self.text_files = filenames
469-
self.text_file_data = OrderedDict()
470-
self.target_table_prefix = target_table_prefix
408+
self.filenames = filenames
409+
self.table_name = table_name
410+
self.sim_table = sim_table
471411

472412
def add_rows(self) -> None:
413+
"""
414+
Creates Ordered Dictionary for the wildfire data.
415+
416+
If sim_table = True, a sim_table Ordered Dict also created, and both are nested within a larger Ordered Dict.
473417
"""
474-
Parses text file data and creates an ordered dict whose keys are table names and values are an ordered dict for each table.
475-
"""
476-
for filename in self.text_files:
477-
df = read_csv(filename)
478-
if self.target_table_prefix is not None:
479-
self.text_file_data[f"{self.target_table_prefix}__text_file"] = OrderedDict(df.to_dict(orient='list'))
480-
else:
481-
self.text_file_data["text_file"] = OrderedDict(df.to_dict(orient='list'))
482-
self.set_schema_2(self.text_file_data)
418+
if self.table_name is None:
419+
self.table_name = "Wildfire"
420+
421+
total_df = DataFrame()
422+
for filename in self.filenames:
423+
temp_df = read_csv(filename)
424+
try:
425+
total_df = concat([total_df, temp_df], axis=0, ignore_index=True)
426+
except:
427+
return (ValueError, f"Error in adding {filename} to the existing wildfire data. Please recheck column names and data structure")
428+
429+
if self.sim_table:
430+
total_df['sim_id'] = range(1, len(total_df) + 1)
431+
total_df = total_df[['sim_id'] + [col for col in total_df.columns if col != 'sim_id']]
432+
433+
total_data = OrderedDict(total_df.to_dict(orient='list'))
434+
for col, coldata in total_data.items(): # replace NaNs with None
435+
total_data[col] = [None if type(item) == float and isnan(item) else item for item in coldata]
436+
437+
self.csv_data[self.table_name] = total_data
438+
439+
if self.sim_table:
440+
sim_list = list(range(1, len(total_df) + 1))
441+
sim_dict = OrderedDict([('sim_id', sim_list)])
442+
self.csv_data["simulation"] = sim_dict
443+
444+
relation_dict = OrderedDict([('primary_key', []), ('foreign_key', [])])
445+
relation_dict["primary_key"].append(("simulation", "sim_id"))
446+
relation_dict["foreign_key"].append((self.table_name, "sim_id"))
447+
self.csv_data["dsi_relations"] = relation_dict
448+
449+
self.set_schema_2(self.csv_data)
483450

484451
class MetadataReader1(FileReader):
485452
"""

dsi/plugins/file_writer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def __init__(self, table_name, filename, export_cols = None, **kwargs):
139139
140140
`export_cols`: default None. When specified, this must be a list of column names to keep in output csv file
141141
142-
- Ex: all columns are ["a", "b", "c", "d", "e"]. export_cols = ["a", "c", "e"]
142+
- Ex: all columns are [a, b, c, d, e]. export_cols = [a, c, e]
143143
"""
144144
super().__init__(filename, **kwargs)
145145
self.csv_file_name = filename

examples/data/test.txt

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Name,Age,Location
2+
Alice,25,New York
3+
Bob,30,Dallas
4+
Charlie,22,Chicago
5+
David,28,Miami
6+
Eve,35,Boston

0 commit comments

Comments
 (0)