Skip to content

Commit c2e89a2

Browse files
committed
error messages include line numbers in core and other minor changes
1 parent 2e86c98 commit c2e89a2

File tree

2 files changed

+58
-36
lines changed

2 files changed

+58
-36
lines changed

dsi/backends/sqlite.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class ValueObject:
4242
# Main storage class, interfaces with SQL
4343
class Sqlite(Filesystem):
4444
"""
45-
SQLite Filesystem Backend to which a user can ingest/read data, generate a Jupyter notebook, and find all occurences of a search term
45+
SQLite Filesystem Backend to which a user can ingest/process data, generate a Jupyter notebook, and find all occurences of a search term
4646
"""
4747
runTable = False
4848

@@ -246,15 +246,15 @@ def query_artifacts(self, query, isVerbose=False, dict_return = False):
246246
if isVerbose:
247247
print(data)
248248
except:
249-
return (ValueError, "Error in get_artifacts/process_artifacts handler: Incorrect SELECT query on the data. Please try again")
249+
return (ValueError, "Error in query_artifacts/get_artifacts handler: Incorrect SELECT query on the data. Please try again")
250250
else:
251-
return (ValueError, "Error in get_artifacts/process_artifacts handler: Can only run SELECT or PRAGMA queries on the data")
251+
return (ValueError, "Error in query_artifacts/get_artifacts handler: Can only run SELECT or PRAGMA queries on the data")
252252

253253
if dict_return:
254254
query_cols = [description[0] for description in self.cur.description]
255255
tables = re.findall(r'FROM\s+(\w+)|JOIN\s+(\w+)', query, re.IGNORECASE)
256256
if len(tables) > 1:
257-
return (ValueError, "Error in get_artifacts/process_artifacts handler: Can only return ordered dictionary if query with one table")
257+
return (ValueError, "Error in query_artifacts/get_artifacts handler: Can only return ordered dictionary if query with one table")
258258

259259
queryDict = OrderedDict()
260260
for row in data:

dsi/core.py

+54-32
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import logging
99
from datetime import datetime
1010
import warnings
11+
import sys
1112

1213
from dsi.backends.filesystem import Filesystem
1314
from dsi.backends.sqlite import Sqlite, DataType, Artifact
@@ -158,15 +159,17 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs):
158159
self.logger.info(" Activating this reader in load_module")
159160

160161
try:
162+
sys.settrace(self.trace_function) # starts a short trace to get line number where plugin reader returned
161163
ingest_error = obj.add_rows()
162164
if ingest_error is not None:
163165
if self.debug_level != 0:
164166
self.logger.error(f" {ingest_error[1]}")
165-
raise ingest_error[0](ingest_error[1])
167+
raise ingest_error[0](f"Caught error in {original_file} @ line {return_line_number}: " + ingest_error[1])
168+
sys.settrace(None) # ends trace to prevent large overhead
166169
except:
167170
if self.debug_level != 0:
168-
self.logger.error(f' {mod_name} plugin reader add_rows() was incorrect. Check to ensure data was stored correctly')
169-
raise RuntimeError(f'{mod_name} plugin reader add_rows() was incorrect. Check to ensure data was stored correctly')
171+
self.logger.error(f' Data structure error in add_rows() of {mod_name} plugin. Check to ensure data was stored correctly')
172+
raise RuntimeError(f'Data structure error in add_rows() of {mod_name} plugin. Check to ensure data was stored correctly')
170173

171174
for table_name, table_metadata in obj.output_collector.items():
172175
if "hostname" in table_name.lower():
@@ -273,7 +276,7 @@ def add_external_python_module(self, mod_type, mod_name, mod_path):
273276
274277
Note: mod_type is needed because each Python module only implements plugins or backends.
275278
276-
Check Examples section to see how to use this function.
279+
Check Example 7 in Core:Examples on GitHub Docs to see how to use this function.
277280
"""
278281
mod = SourceFileLoader(mod_name, mod_path).load_module()
279282
self.module_collection[mod_type][mod_name] = mod
@@ -301,18 +304,20 @@ def transload(self, **kwargs):
301304
self.logger.info(f"Transloading {obj.__class__.__name__} {'writer'}")
302305
start = datetime.now()
303306
try:
307+
sys.settrace(self.trace_function) # starts a short trace to get line number where writer plugin returned
304308
writer_error = obj.get_rows(self.active_metadata, **kwargs)
305309
if writer_error is not None:
306310
if writer_error[0] == "Warning":
307311
warnings.warn(writer_error[1])
308312
else:
309313
if self.debug_level != 0:
310314
self.logger.error(writer_error[1])
311-
raise writer_error[0](writer_error[1])
315+
raise writer_error[0](f"Caught error in {original_file} @ line {return_line_number}: " + writer_error[1])
316+
sys.settrace(None) # ends trace to prevent large overhead
312317
except:
313-
if self.debug_level != 0:
314-
self.logger.error(f' {obj.__class__.__name__} plugin writer add_rows() was incorrect. Check to ensure data was stored correctly')
315-
raise RuntimeError(f'{obj.__class__.__name__} plugin writer add_rows() was incorrect. Check to ensure data was stored correctly')
318+
if self.debug_level != 0:
319+
self.logger.error(f' Data structure error in get_rows() of {obj.__class__.__name__} plugin. Check to ensure data was handled correctly')
320+
raise RuntimeError(f'Data structure error in get_rows() of {obj.__class__.__name__} plugin. Check to ensure data was handled correctly')
316321
used_writers.append(obj)
317322
end = datetime.now()
318323
self.logger.info(f"Runtime: {end-start}")
@@ -379,7 +384,7 @@ def artifact_handler(self, interaction_type, query = None, **kwargs):
379384

380385
operation_success = False
381386
backread_active = False
382-
if interaction_type in ['put', 'ingest']:
387+
if interaction_type in ['ingest', 'put']:
383388
for obj in self.active_modules['back-write']:
384389
if self.debug_level != 0:
385390
self.logger.info("-------------------------------------")
@@ -397,47 +402,55 @@ def artifact_handler(self, interaction_type, query = None, **kwargs):
397402
if self.debug_level != 0:
398403
self.logger.info(f" Backup file runtime: {backup_end-backup_start}")
399404

405+
sys.settrace(self.trace_function) # starts a short trace to get line number where ingest_artifacts() returned
400406
if interaction_type == "ingest":
401407
errorMessage = obj.ingest_artifacts(collection = self.active_metadata, **kwargs)
402408
elif interaction_type == "put":
403409
errorMessage = obj.put_artifacts(collection = self.active_metadata, **kwargs)
404410
if errorMessage is not None:
405411
if self.debug_level != 0:
406-
self.logger.error(f"Error inserting data to the {obj.__class__.__name__} backend: {errorMessage[1]}")
407-
raise errorMessage[0](f"Error inserting data to the {obj.__class__.__name__} backend: {errorMessage[1]}")
412+
self.logger.error(f"Error ingesting data in {original_file} @ line {return_line_number} due to {errorMessage[1]}")
413+
raise errorMessage[0](f"Error ingesting data in {original_file} @ line {return_line_number} due to {errorMessage[1]}")
414+
sys.settrace(None) # ends trace to prevent large overhead
408415
operation_success = True
409416
end = datetime.now()
410417
self.logger.info(f"Runtime: {end-start}")
411-
if interaction_type in ['put', 'ingest'] and len(self.active_modules['back-read']) > 0:
418+
if interaction_type in ['ingest', 'put'] and len(self.active_modules['back-read']) > 0:
412419
backread_active = True
413420

414-
get_artifact_data = None
421+
query_data = None
415422
first_backend = self.loaded_backends[0]
416-
if interaction_type not in ['put', 'ingest', "read", "process"] and self.debug_level != 0:
423+
if interaction_type not in ['ingest', 'put', "processs", "read"] and self.debug_level != 0:
417424
self.logger.info("-------------------------------------")
418425
self.logger.info(f"{first_backend.__class__.__name__} backend - {interaction_type.upper()} the data")
419426
start = datetime.now()
420-
if interaction_type in ['get', 'query']:
421-
if "query" in first_backend.get_artifacts.__code__.co_varnames: #need to change this to query_artifacts eventually
427+
if interaction_type in ['query', 'get']:
428+
if "query" in first_backend.query_artifacts.__code__.co_varnames:
422429
self.logger.info(f"Query to get data: {query}")
423430
kwargs['query'] = query
431+
432+
sys.settrace(self.trace_function) # starts a short trace to get line number where query_artifacts() returned
424433
if interaction_type == "get":
425-
get_artifact_data = first_backend.get_artifacts(**kwargs)
434+
query_data = first_backend.get_artifacts(**kwargs)
426435
elif interaction_type == "query":
427-
get_artifact_data = first_backend.query_artifacts(**kwargs)
428-
if isinstance(get_artifact_data, tuple):
436+
query_data = first_backend.query_artifacts(**kwargs)
437+
if isinstance(query_data, tuple):
429438
if self.debug_level != 0:
430-
self.logger.error(get_artifact_data[1])
431-
raise get_artifact_data[0](get_artifact_data[1])
439+
self.logger.error(query_data[1])
440+
raise query_data[0](f"Caught error in {original_file} @ line {return_line_number}: " + query_data[1])
441+
sys.settrace(None) # ends trace to prevent large overhead
432442
operation_success = True
433443

434-
elif interaction_type in ['inspect', 'notebook']:
444+
elif interaction_type in ['notebook', 'inspect']:
435445
parent_class = first_backend.__class__.__bases__[0].__name__
436446
if parent_class == "Filesystem" and os.path.getsize(first_backend.filename) > 100:
437-
if interaction_type == "inspect":
438-
first_backend.inspect_artifacts(**kwargs)
439-
elif interaction_type == "notebook":
440-
first_backend.notebook(**kwargs)
447+
try:
448+
if interaction_type == "inspect":
449+
first_backend.inspect_artifacts(**kwargs)
450+
elif interaction_type == "notebook":
451+
first_backend.notebook(**kwargs)
452+
except:
453+
raise ValueError("Error in generating notebook. Please ensure data in the actual backend is stable")
441454
elif parent_class == "Connection": # NEED ANOTHER CHECKER TO SEE IF BACKEND IS NOT EMPTY WHEN BACKEND IS NOT A FILESYSTEM
442455
pass
443456
else: #backend is empty - cannot inspect
@@ -446,7 +459,7 @@ def artifact_handler(self, interaction_type, query = None, **kwargs):
446459
raise ValueError("Error in notebook/inspect artifact handler: Need to ingest data into a backend before generating Jupyter notebook")
447460
operation_success = True
448461

449-
elif interaction_type in ["read", "process"] and len(self.active_modules['back-read']) > 0:
462+
elif interaction_type in ["process", "read"] and len(self.active_modules['back-read']) > 0:
450463
first_backread = self.active_modules['back-read'][0]
451464
if self.debug_level != 0:
452465
self.logger.info(f"{first_backread.__class__.__name__} backend - {interaction_type.upper()} the data")
@@ -455,25 +468,34 @@ def artifact_handler(self, interaction_type, query = None, **kwargs):
455468
elif interaction_type == "read":
456469
self.active_metadata = first_backread.read_to_artifact()
457470
operation_success = True
458-
elif interaction_type in ["read", "process"] and len(self.active_modules['back-read']) == 0:
471+
elif interaction_type in ["process", "read"] and len(self.active_modules['back-read']) == 0:
459472
backread_active = True
460473

461474
if operation_success:
462475
end = datetime.now()
463476
if self.debug_level != 0:
464477
self.logger.info(f"Runtime: {end-start}")
465-
if interaction_type in ['get', 'query'] and get_artifact_data is not None:
466-
return get_artifact_data
478+
if interaction_type in ['query', 'get'] and query_data is not None:
479+
return query_data
467480
else:
468481
not_run_msg = None
469482
if backread_active:
470-
not_run_msg = 'Remember that back-WRITE backends cannot read/process data and back-READ backends cannot put/ingest'
483+
not_run_msg = 'Remember that back-WRITE backends cannot process/read data and back-READ backends cannot ingest/put'
471484
else:
472-
not_run_msg = 'Is your artifact interaction implemented in your backend?'
485+
not_run_msg = 'Is your artifact interaction implemented in your specified backend?'
473486
if self.debug_level != 0:
474487
self.logger.error(not_run_msg)
475488
raise NotImplementedError(not_run_msg)
476489

490+
# Internal function used to get line numbers from return statements - should not be called by users
491+
def trace_function(self, frame, event, arg):
492+
global return_line_number
493+
global original_file
494+
if event == "return":
495+
return_line_number = frame.f_lineno # Get line number
496+
original_file = frame.f_code.co_filename # Get file name
497+
return self.trace_function
498+
477499
def find(self, query_object):
478500
"""
479501
Find all function that searches for all instances of 'query_object' in first loaded backend. Searches among all tables/column/cells

0 commit comments

Comments
 (0)