@@ -39,7 +39,7 @@ class Csv(FileReader):
39
39
"""
40
40
def __init__ (self , filenames , table_name = None , ** kwargs ):
41
41
"""
42
- Initializes CSV Reader with user specified parameters. INCLUDE NECESSARY PARAMETERS WHEN LOADING THIS PLUGIN WITH CORE.LOAD_MODULE()
42
+ Initializes CSV Reader with user specified parameters.
43
43
44
44
`filenames`: Required input. List of CSV files, or just one CSV files to store in DSI. If a list, data in all files must be for the same table
45
45
@@ -75,74 +75,6 @@ def add_rows(self) -> None:
75
75
76
76
self .set_schema_2 (self .csv_data )
77
77
78
- class Wildfire (FileReader ):
79
- """
80
- A Structured Data Plugin to ingest Wildfire data stored as a CSV
81
-
82
- Can be used for other cases if data is post-processed and running only once.
83
-
84
- Can create a manual simulation table
85
- """
86
- def __init__ (self , filenames , table_name = None , sim_table = False , ** kwargs ):
87
- """
88
- Initializes Wildfire Reader with user specified parameters.
89
-
90
- `filenames`: Required input -- Wildfire data files
91
-
92
- `table_name`: default None. User can specify table name when loading the wildfire file.
93
-
94
- `sim_table`: default False. Set to True if creating manual simulation table where each row of Wildfire file is a separate sim
95
-
96
- - also creates new column in wildfire data for each row to associate to a corresponding row/simulation in sim_table
97
- """
98
- super ().__init__ (filenames , ** kwargs )
99
- self .csv_data = OrderedDict ()
100
- if isinstance (filenames , str ):
101
- self .filenames = [filenames ]
102
- else :
103
- self .filenames = filenames
104
- self .table_name = table_name
105
- self .sim_table = sim_table
106
-
107
- def add_rows (self ) -> None :
108
- """
109
- Creates Ordered Dictionary for the wildfire data.
110
-
111
- If sim_table = True, a sim_table Ordered Dict also created, and both are nested within a larger Ordered Dict.
112
- """
113
- if self .table_name is None :
114
- self .table_name = "Wildfire"
115
-
116
- total_df = DataFrame ()
117
- for filename in self .filenames :
118
- temp_df = read_csv (filename )
119
- try :
120
- total_df = concat ([total_df , temp_df ], axis = 0 , ignore_index = True )
121
- except :
122
- return (ValueError , f"Error in adding { filename } to the existing wildfire data. Please recheck column names and data structure" )
123
-
124
- if self .sim_table :
125
- total_df ['sim_id' ] = range (1 , len (total_df ) + 1 )
126
- total_df = total_df [['sim_id' ] + [col for col in total_df .columns if col != 'sim_id' ]]
127
-
128
- total_data = OrderedDict (total_df .to_dict (orient = 'list' ))
129
- for col , coldata in total_data .items (): # replace NaNs with None
130
- total_data [col ] = [None if type (item ) == float and isnan (item ) else item for item in coldata ]
131
-
132
- self .csv_data [self .table_name ] = total_data
133
-
134
- if self .sim_table :
135
- sim_list = list (range (1 , len (total_df ) + 1 ))
136
- sim_dict = OrderedDict ([('sim_id' , sim_list )])
137
- self .csv_data ["simulation" ] = sim_dict
138
-
139
- relation_dict = OrderedDict ([('primary_key' , []), ('foreign_key' , [])])
140
- relation_dict ["primary_key" ].append (("simulation" , "sim_id" ))
141
- relation_dict ["foreign_key" ].append ((self .table_name , "sim_id" ))
142
- self .csv_data ["dsi_relations" ] = relation_dict
143
-
144
- self .set_schema_2 (self .csv_data )
145
-
146
78
class Bueno (FileReader ):
147
79
"""
148
80
A Structured Data Plugin to capture performance data from Bueno (github.com/lanl/bueno)
@@ -449,37 +381,72 @@ def add_rows(self) -> None:
449
381
del self .toml_data ["dsi_units" ]
450
382
self .set_schema_2 (self .toml_data )
451
383
452
- class TextFile (FileReader ):
384
+ class Wildfire (FileReader ):
453
385
"""
454
- Structured Data Plugin to read in an individual or a set of text files
386
+ A Structured Data Plugin to ingest Wildfire data stored as a CSV
455
387
456
- Table names are the keys for the main ordered dictionary and column names are the keys for each table's nested ordered dictionary
388
+ Can be used for other cases if data is post-processed and running only once.
389
+ Can create a manual simulation table
457
390
"""
458
- def __init__ (self , filenames , target_table_prefix = None , ** kwargs ):
391
+ def __init__ (self , filenames , table_name = None , sim_table = False , ** kwargs ):
459
392
"""
460
- `filenames`: one text file or a list of text files to be ingested
393
+ Initializes Wildfire Reader with user specified parameters.
394
+
395
+ `filenames`: Required input -- Wildfire data files
396
+
397
+ `table_name`: default None. User can specify table name when loading the wildfire file.
461
398
462
- `target_table_prefix`: prefix to be added to every table created to differentiate between other text file sources
399
+ `sim_table`: default False. Set to True if creating manual simulation table where each row of Wildfire file is a separate sim
400
+
401
+ - also creates new column in wildfire data for each row to associate to a corresponding row/simulation in sim_table
463
402
"""
464
403
super ().__init__ (filenames , ** kwargs )
404
+ self .csv_data = OrderedDict ()
465
405
if isinstance (filenames , str ):
466
- self .text_files = [filenames ]
406
+ self .filenames = [filenames ]
467
407
else :
468
- self .text_files = filenames
469
- self .text_file_data = OrderedDict ()
470
- self .target_table_prefix = target_table_prefix
408
+ self .filenames = filenames
409
+ self .table_name = table_name
410
+ self .sim_table = sim_table
471
411
472
412
def add_rows (self ) -> None :
413
+ """
414
+ Creates Ordered Dictionary for the wildfire data.
415
+
416
+ If sim_table = True, a sim_table Ordered Dict also created, and both are nested within a larger Ordered Dict.
473
417
"""
474
- Parses text file data and creates an ordered dict whose keys are table names and values are an ordered dict for each table.
475
- """
476
- for filename in self .text_files :
477
- df = read_csv (filename )
478
- if self .target_table_prefix is not None :
479
- self .text_file_data [f"{ self .target_table_prefix } __text_file" ] = OrderedDict (df .to_dict (orient = 'list' ))
480
- else :
481
- self .text_file_data ["text_file" ] = OrderedDict (df .to_dict (orient = 'list' ))
482
- self .set_schema_2 (self .text_file_data )
418
+ if self .table_name is None :
419
+ self .table_name = "Wildfire"
420
+
421
+ total_df = DataFrame ()
422
+ for filename in self .filenames :
423
+ temp_df = read_csv (filename )
424
+ try :
425
+ total_df = concat ([total_df , temp_df ], axis = 0 , ignore_index = True )
426
+ except :
427
+ return (ValueError , f"Error in adding { filename } to the existing wildfire data. Please recheck column names and data structure" )
428
+
429
+ if self .sim_table :
430
+ total_df ['sim_id' ] = range (1 , len (total_df ) + 1 )
431
+ total_df = total_df [['sim_id' ] + [col for col in total_df .columns if col != 'sim_id' ]]
432
+
433
+ total_data = OrderedDict (total_df .to_dict (orient = 'list' ))
434
+ for col , coldata in total_data .items (): # replace NaNs with None
435
+ total_data [col ] = [None if type (item ) == float and isnan (item ) else item for item in coldata ]
436
+
437
+ self .csv_data [self .table_name ] = total_data
438
+
439
+ if self .sim_table :
440
+ sim_list = list (range (1 , len (total_df ) + 1 ))
441
+ sim_dict = OrderedDict ([('sim_id' , sim_list )])
442
+ self .csv_data ["simulation" ] = sim_dict
443
+
444
+ relation_dict = OrderedDict ([('primary_key' , []), ('foreign_key' , [])])
445
+ relation_dict ["primary_key" ].append (("simulation" , "sim_id" ))
446
+ relation_dict ["foreign_key" ].append ((self .table_name , "sim_id" ))
447
+ self .csv_data ["dsi_relations" ] = relation_dict
448
+
449
+ self .set_schema_2 (self .csv_data )
483
450
484
451
class MetadataReader1 (FileReader ):
485
452
"""
0 commit comments