inline code documentation updates and removed textfile as dsi reader

Vedant1 · Vedant1 · commit 77d8632bb43b · 2025-03-07T00:24:11.000-07:00
diff --git a/dsi/backends/sqlite.py b/dsi/backends/sqlite.py
@@ -72,6 +72,10 @@ def check_type(self, input_list):
                 return " VARCHAR"
         return ""
     
+    # OLD NAME OF ingest_table_helper. TO BE DEPRECATED IN FUTURE DSI RELEASE
+    def put_artifact_type(self, types, foreign_query = None, isVerbose=False):
+        self.ingest_table_helper(types, foreign_query, isVerbose)
+        
     def ingest_table_helper(self, types, foreign_query = None, isVerbose=False):
         """
         Helper function to create SQLite table based on the passed in schema. Used within ingest_artifact()
@@ -113,13 +117,9 @@ def ingest_table_helper(self, types, foreign_query = None, isVerbose=False):
             self.cur.execute(str_query)
             self.types = types
 
-    # OLD NAME OF ingest_table_helper. TO BE DEPRECATED IN FUTURE DSI RELEASE
-    def put_artifact_type(self, types, foreign_query = None, isVerbose=False):
-        self.ingest_table_helper(self, types, foreign_query, isVerbose)
-
     # OLD NAME OF ingest_artifacts(). TO BE DEPRECATED IN FUTURE DSI RELEASE
     def put_artifacts(self, collection, isVerbose=False):
-        return self.ingest_artifacts(self, collection, isVerbose)
+        return self.ingest_artifacts(collection, isVerbose)
     
     def ingest_artifacts(self, collection, isVerbose=False):    
         """
@@ -681,6 +681,8 @@ def close(self):
     # OLD FUNCTION TO DEPRECATE
     def put_artifacts_t(self, collection, tableName="TABLENAME", isVerbose=False):
         """
+        DSI 1.0 FUNCTIONALITY - DEPRECATING SOON, DO NOT USE
+        
         Primary class for insertion of collection of Artifacts metadata into a defined schema, with a table passthrough
 
         `collection`: A Python Collection of an Artifact derived class that has multiple regular structures of a defined schema,
diff --git a/dsi/core.py b/dsi/core.py
@@ -26,7 +26,7 @@ class Terminal():
     PLUGIN_PREFIX = ['dsi.plugins']
     PLUGIN_IMPLEMENTATIONS = ['env', 'file_reader', 'file_writer']
     VALID_ENV = ['Hostname', 'SystemKernel', 'GitInfo']
-    VALID_READERS = ['Bueno', 'Csv', 'YAML1', 'TOML1', 'Schema', 'TextFile', 'MetadataReader1', 'Wildfire']
+    VALID_READERS = ['Bueno', 'Csv', 'YAML1', 'TOML1', 'Schema', 'MetadataReader1', 'Wildfire']
     VALID_WRITERS = ['ER_Diagram', 'Table_Plot', 'Csv_Writer']
     VALID_PLUGINS = VALID_ENV + VALID_READERS + VALID_WRITERS
     VALID_BACKENDS = ['Gufi', 'Sqlite', 'Parquet']
@@ -41,14 +41,13 @@ def __init__(self, debug = 0, backup_db = False, runTable = False):
 
         Optional flags can be set and defined:
 
-        `debug`: {0: off, 1: user debug log, 2: user + developer debug log}. 
-                      When set to 1 or 2, debug info will write to a local debug.log text file with various benchmarks.
-
+        `debug`: {0: off, 1: user debug log, 2: user + developer debug log} 
+            
+            - When set to 1 or 2, debug info will write to a local debug.log text file with various benchmarks.
         `backup_db`: Undefined False as default. If set to True, this creates a backup database before committing new changes.
 
         `runTable`: Undefined False as default. 
-                          When new metadata is ingested, a 'runTable' is created, appended, and timestamped when database in incremented. 
-                          Recommended for in-situ use-cases.
+        When new metadata is ingested, a 'runTable' is created, appended, and timestamped when database in incremented. Recommended for in-situ use-cases.
         """
         def static_munge(prefix, implementations):
             return (['.'.join(i) for i in product(prefix, implementations)])
@@ -278,6 +277,7 @@ def add_external_python_module(self, mod_type, mod_name, mod_path):
         """
         mod = SourceFileLoader(mod_name, mod_path).load_module()
         self.module_collection[mod_type][mod_name] = mod
+        self.VALID_MODULES.append(mod_name)
 
     def list_loaded_modules(self):
         """
diff --git a/dsi/plugins/file_reader.py b/dsi/plugins/file_reader.py
@@ -39,7 +39,7 @@ class Csv(FileReader):
     """
     def __init__(self, filenames, table_name = None, **kwargs):
         """
-        Initializes CSV Reader with user specified parameters. INCLUDE NECESSARY PARAMETERS WHEN LOADING THIS PLUGIN WITH CORE.LOAD_MODULE()
+        Initializes CSV Reader with user specified parameters.
 
         `filenames`: Required input. List of CSV files, or just one CSV files to store in DSI. If a list, data in all files must be for the same table
 
@@ -75,74 +75,6 @@ def add_rows(self) -> None:
         
         self.set_schema_2(self.csv_data)
 
-class Wildfire(FileReader):
-    """
-    A Structured Data Plugin to ingest Wildfire data stored as a CSV
-
-    Can be used for other cases if data is post-processed and running only once.
-
-    Can create a manual simulation table
-    """
-    def __init__(self, filenames, table_name = None, sim_table = False, **kwargs):
-        """
-        Initializes Wildfire Reader with user specified parameters.
-
-        `filenames`: Required input -- Wildfire data files
-
-        `table_name`: default None. User can specify table name when loading the wildfire file.   
-
-        `sim_table`: default False. Set to True if creating manual simulation table where each row of Wildfire file is a separate sim
-
-            - also creates new column in wildfire data for each row to associate to a corresponding row/simulation in sim_table
-        """
-        super().__init__(filenames, **kwargs)
-        self.csv_data = OrderedDict()
-        if isinstance(filenames, str):
-            self.filenames = [filenames]
-        else:
-            self.filenames = filenames
-        self.table_name = table_name
-        self.sim_table = sim_table
-
-    def add_rows(self) -> None:
-        """ 
-        Creates Ordered Dictionary for the wildfire data. 
-
-        If sim_table = True, a sim_table Ordered Dict also created, and both are nested within a larger Ordered Dict.
-        """
-        if self.table_name is None:
-            self.table_name = "Wildfire"
-
-        total_df = DataFrame()
-        for filename in self.filenames:
-            temp_df = read_csv(filename)
-            try:
-                total_df = concat([total_df, temp_df], axis=0, ignore_index=True)
-            except:
-                return (ValueError, f"Error in adding {filename} to the existing wildfire data. Please recheck column names and data structure")
-        
-        if self.sim_table:
-            total_df['sim_id'] = range(1, len(total_df) + 1)
-            total_df = total_df[['sim_id'] + [col for col in total_df.columns if col != 'sim_id']]
-
-        total_data = OrderedDict(total_df.to_dict(orient='list'))
-        for col, coldata in total_data.items():  # replace NaNs with None
-            total_data[col] = [None if type(item) == float and isnan(item) else item for item in coldata]
-        
-        self.csv_data[self.table_name] = total_data
-        
-        if self.sim_table:
-            sim_list = list(range(1, len(total_df) + 1))
-            sim_dict = OrderedDict([('sim_id', sim_list)])
-            self.csv_data["simulation"] = sim_dict
-
-            relation_dict = OrderedDict([('primary_key', []), ('foreign_key', [])])
-            relation_dict["primary_key"].append(("simulation", "sim_id"))
-            relation_dict["foreign_key"].append((self.table_name, "sim_id"))
-            self.csv_data["dsi_relations"] = relation_dict
-       
-        self.set_schema_2(self.csv_data)
-
 class Bueno(FileReader):
     """
     A Structured Data Plugin to capture performance data from Bueno (github.com/lanl/bueno)
@@ -449,37 +381,72 @@ def add_rows(self) -> None:
             del self.toml_data["dsi_units"]
         self.set_schema_2(self.toml_data)
 
-class TextFile(FileReader):
+class Wildfire(FileReader):
     """
-    Structured Data Plugin to read in an individual or a set of text files
+    A Structured Data Plugin to ingest Wildfire data stored as a CSV
 
-    Table names are the keys for the main ordered dictionary and column names are the keys for each table's nested ordered dictionary
+    Can be used for other cases if data is post-processed and running only once.
+    Can create a manual simulation table
     """
-    def __init__(self, filenames, target_table_prefix = None, **kwargs):
+    def __init__(self, filenames, table_name = None, sim_table = False, **kwargs):
         """
-        `filenames`: one text file or a list of text files to be ingested
+        Initializes Wildfire Reader with user specified parameters.
+
+        `filenames`: Required input -- Wildfire data files
+
+        `table_name`: default None. User can specify table name when loading the wildfire file.   
 
-        `target_table_prefix`: prefix to be added to every table created to differentiate between other text file sources
+        `sim_table`: default False. Set to True if creating manual simulation table where each row of Wildfire file is a separate sim
+
+            - also creates new column in wildfire data for each row to associate to a corresponding row/simulation in sim_table
         """
         super().__init__(filenames, **kwargs)
+        self.csv_data = OrderedDict()
         if isinstance(filenames, str):
-            self.text_files = [filenames]
+            self.filenames = [filenames]
         else:
-            self.text_files = filenames
-        self.text_file_data = OrderedDict()
-        self.target_table_prefix = target_table_prefix
+            self.filenames = filenames
+        self.table_name = table_name
+        self.sim_table = sim_table
 
     def add_rows(self) -> None:
+        """ 
+        Creates Ordered Dictionary for the wildfire data. 
+
+        If sim_table = True, a sim_table Ordered Dict also created, and both are nested within a larger Ordered Dict.
         """
-        Parses text file data and creates an ordered dict whose keys are table names and values are an ordered dict for each table.
-        """
-        for filename in self.text_files:
-            df = read_csv(filename)
-            if self.target_table_prefix is not None:
-                self.text_file_data[f"{self.target_table_prefix}__text_file"] = OrderedDict(df.to_dict(orient='list'))
-            else:
-                self.text_file_data["text_file"] = OrderedDict(df.to_dict(orient='list'))
-            self.set_schema_2(self.text_file_data)
+        if self.table_name is None:
+            self.table_name = "Wildfire"
+
+        total_df = DataFrame()
+        for filename in self.filenames:
+            temp_df = read_csv(filename)
+            try:
+                total_df = concat([total_df, temp_df], axis=0, ignore_index=True)
+            except:
+                return (ValueError, f"Error in adding {filename} to the existing wildfire data. Please recheck column names and data structure")
+        
+        if self.sim_table:
+            total_df['sim_id'] = range(1, len(total_df) + 1)
+            total_df = total_df[['sim_id'] + [col for col in total_df.columns if col != 'sim_id']]
+
+        total_data = OrderedDict(total_df.to_dict(orient='list'))
+        for col, coldata in total_data.items():  # replace NaNs with None
+            total_data[col] = [None if type(item) == float and isnan(item) else item for item in coldata]
+        
+        self.csv_data[self.table_name] = total_data
+        
+        if self.sim_table:
+            sim_list = list(range(1, len(total_df) + 1))
+            sim_dict = OrderedDict([('sim_id', sim_list)])
+            self.csv_data["simulation"] = sim_dict
+
+            relation_dict = OrderedDict([('primary_key', []), ('foreign_key', [])])
+            relation_dict["primary_key"].append(("simulation", "sim_id"))
+            relation_dict["foreign_key"].append((self.table_name, "sim_id"))
+            self.csv_data["dsi_relations"] = relation_dict
+       
+        self.set_schema_2(self.csv_data)
 
 class MetadataReader1(FileReader):
     """
diff --git a/dsi/plugins/file_writer.py b/dsi/plugins/file_writer.py
@@ -139,7 +139,7 @@ def __init__(self, table_name, filename, export_cols = None, **kwargs):
 
         `export_cols`: default None. When specified, this must be a list of column names to keep in output csv file
 
-            - Ex: all columns are ["a", "b", "c", "d", "e"]. export_cols = ["a", "c", "e"]
+            - Ex: all columns are [a, b, c, d, e]. export_cols = [a, c, e]
         """
         super().__init__(filename, **kwargs)
         self.csv_file_name = filename
diff --git a/examples/data/test.txt b/examples/data/test.txt
@@ -0,0 +1,6 @@
+Name,Age,Location
+Alice,25,New York
+Bob,30,Dallas
+Charlie,22,Chicago
+David,28,Miami
+Eve,35,Boston