8
8
import logging
9
9
from datetime import datetime
10
10
import warnings
11
+ import sys
11
12
12
13
from dsi .backends .filesystem import Filesystem
13
14
from dsi .backends .sqlite import Sqlite , DataType , Artifact
@@ -158,15 +159,17 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs):
158
159
self .logger .info (" Activating this reader in load_module" )
159
160
160
161
try :
162
+ sys .settrace (self .trace_function ) # starts a short trace to get line number where plugin reader returned
161
163
ingest_error = obj .add_rows ()
162
164
if ingest_error is not None :
163
165
if self .debug_level != 0 :
164
166
self .logger .error (f" { ingest_error [1 ]} " )
165
- raise ingest_error [0 ](ingest_error [1 ])
167
+ raise ingest_error [0 ](f"Caught error in { original_file } @ line { return_line_number } : " + ingest_error [1 ])
168
+ sys .settrace (None ) # ends trace to prevent large overhead
166
169
except :
167
170
if self .debug_level != 0 :
168
- self .logger .error (f' { mod_name } plugin reader add_rows() was incorrect . Check to ensure data was stored correctly' )
169
- raise RuntimeError (f'{ mod_name } plugin reader add_rows() was incorrect . Check to ensure data was stored correctly' )
171
+ self .logger .error (f' Data structure error in add_rows() of { mod_name } plugin . Check to ensure data was stored correctly' )
172
+ raise RuntimeError (f'Data structure error in add_rows() of { mod_name } plugin . Check to ensure data was stored correctly' )
170
173
171
174
for table_name , table_metadata in obj .output_collector .items ():
172
175
if "hostname" in table_name .lower ():
@@ -273,7 +276,7 @@ def add_external_python_module(self, mod_type, mod_name, mod_path):
273
276
274
277
Note: mod_type is needed because each Python module only implements plugins or backends.
275
278
276
- Check Examples section to see how to use this function.
279
+ Check Example 7 in Core: Examples on GitHub Docs to see how to use this function.
277
280
"""
278
281
mod = SourceFileLoader (mod_name , mod_path ).load_module ()
279
282
self .module_collection [mod_type ][mod_name ] = mod
@@ -301,18 +304,20 @@ def transload(self, **kwargs):
301
304
self .logger .info (f"Transloading { obj .__class__ .__name__ } { 'writer' } " )
302
305
start = datetime .now ()
303
306
try :
307
+ sys .settrace (self .trace_function ) # starts a short trace to get line number where writer plugin returned
304
308
writer_error = obj .get_rows (self .active_metadata , ** kwargs )
305
309
if writer_error is not None :
306
310
if writer_error [0 ] == "Warning" :
307
311
warnings .warn (writer_error [1 ])
308
312
else :
309
313
if self .debug_level != 0 :
310
314
self .logger .error (writer_error [1 ])
311
- raise writer_error [0 ](writer_error [1 ])
315
+ raise writer_error [0 ](f"Caught error in { original_file } @ line { return_line_number } : " + writer_error [1 ])
316
+ sys .settrace (None ) # ends trace to prevent large overhead
312
317
except :
313
- if self .debug_level != 0 :
314
- self .logger .error (f' { obj .__class__ .__name__ } plugin writer add_rows() was incorrect . Check to ensure data was stored correctly' )
315
- raise RuntimeError (f'{ obj .__class__ .__name__ } plugin writer add_rows() was incorrect . Check to ensure data was stored correctly' )
318
+ if self .debug_level != 0 :
319
+ self .logger .error (f' Data structure error in get_rows() of { obj .__class__ .__name__ } plugin. Check to ensure data was handled correctly' )
320
+ raise RuntimeError (f'Data structure error in get_rows() of { obj .__class__ .__name__ } plugin. Check to ensure data was handled correctly' )
316
321
used_writers .append (obj )
317
322
end = datetime .now ()
318
323
self .logger .info (f"Runtime: { end - start } " )
@@ -379,7 +384,7 @@ def artifact_handler(self, interaction_type, query = None, **kwargs):
379
384
380
385
operation_success = False
381
386
backread_active = False
382
- if interaction_type in ['put ' , 'ingest ' ]:
387
+ if interaction_type in ['ingest ' , 'put ' ]:
383
388
for obj in self .active_modules ['back-write' ]:
384
389
if self .debug_level != 0 :
385
390
self .logger .info ("-------------------------------------" )
@@ -397,47 +402,55 @@ def artifact_handler(self, interaction_type, query = None, **kwargs):
397
402
if self .debug_level != 0 :
398
403
self .logger .info (f" Backup file runtime: { backup_end - backup_start } " )
399
404
405
+ sys .settrace (self .trace_function ) # starts a short trace to get line number where ingest_artifacts() returned
400
406
if interaction_type == "ingest" :
401
407
errorMessage = obj .ingest_artifacts (collection = self .active_metadata , ** kwargs )
402
408
elif interaction_type == "put" :
403
409
errorMessage = obj .put_artifacts (collection = self .active_metadata , ** kwargs )
404
410
if errorMessage is not None :
405
411
if self .debug_level != 0 :
406
- self .logger .error (f"Error inserting data to the { obj .__class__ .__name__ } backend: { errorMessage [1 ]} " )
407
- raise errorMessage [0 ](f"Error inserting data to the { obj .__class__ .__name__ } backend: { errorMessage [1 ]} " )
412
+ self .logger .error (f"Error ingesting data in { original_file } @ line { return_line_number } due to { errorMessage [1 ]} " )
413
+ raise errorMessage [0 ](f"Error ingesting data in { original_file } @ line { return_line_number } due to { errorMessage [1 ]} " )
414
+ sys .settrace (None ) # ends trace to prevent large overhead
408
415
operation_success = True
409
416
end = datetime .now ()
410
417
self .logger .info (f"Runtime: { end - start } " )
411
- if interaction_type in ['put ' , 'ingest ' ] and len (self .active_modules ['back-read' ]) > 0 :
418
+ if interaction_type in ['ingest ' , 'put ' ] and len (self .active_modules ['back-read' ]) > 0 :
412
419
backread_active = True
413
420
414
- get_artifact_data = None
421
+ query_data = None
415
422
first_backend = self .loaded_backends [0 ]
416
- if interaction_type not in ['put ' , 'ingest ' , "read " , "process " ] and self .debug_level != 0 :
423
+ if interaction_type not in ['ingest ' , 'put ' , "processs " , "read " ] and self .debug_level != 0 :
417
424
self .logger .info ("-------------------------------------" )
418
425
self .logger .info (f"{ first_backend .__class__ .__name__ } backend - { interaction_type .upper ()} the data" )
419
426
start = datetime .now ()
420
- if interaction_type in ['get ' , 'query ' ]:
421
- if "query" in first_backend .get_artifacts .__code__ .co_varnames : #need to change this to query_artifacts eventually
427
+ if interaction_type in ['query ' , 'get ' ]:
428
+ if "query" in first_backend .query_artifacts .__code__ .co_varnames :
422
429
self .logger .info (f"Query to get data: { query } " )
423
430
kwargs ['query' ] = query
431
+
432
+ sys .settrace (self .trace_function ) # starts a short trace to get line number where query_artifacts() returned
424
433
if interaction_type == "get" :
425
- get_artifact_data = first_backend .get_artifacts (** kwargs )
434
+ query_data = first_backend .get_artifacts (** kwargs )
426
435
elif interaction_type == "query" :
427
- get_artifact_data = first_backend .query_artifacts (** kwargs )
428
- if isinstance (get_artifact_data , tuple ):
436
+ query_data = first_backend .query_artifacts (** kwargs )
437
+ if isinstance (query_data , tuple ):
429
438
if self .debug_level != 0 :
430
- self .logger .error (get_artifact_data [1 ])
431
- raise get_artifact_data [0 ](get_artifact_data [1 ])
439
+ self .logger .error (query_data [1 ])
440
+ raise query_data [0 ](f"Caught error in { original_file } @ line { return_line_number } : " + query_data [1 ])
441
+ sys .settrace (None ) # ends trace to prevent large overhead
432
442
operation_success = True
433
443
434
- elif interaction_type in ['inspect ' , 'notebook ' ]:
444
+ elif interaction_type in ['notebook ' , 'inspect ' ]:
435
445
parent_class = first_backend .__class__ .__bases__ [0 ].__name__
436
446
if parent_class == "Filesystem" and os .path .getsize (first_backend .filename ) > 100 :
437
- if interaction_type == "inspect" :
438
- first_backend .inspect_artifacts (** kwargs )
439
- elif interaction_type == "notebook" :
440
- first_backend .notebook (** kwargs )
447
+ try :
448
+ if interaction_type == "inspect" :
449
+ first_backend .inspect_artifacts (** kwargs )
450
+ elif interaction_type == "notebook" :
451
+ first_backend .notebook (** kwargs )
452
+ except :
453
+ raise ValueError ("Error in generating notebook. Please ensure data in the actual backend is stable" )
441
454
elif parent_class == "Connection" : # NEED ANOTHER CHECKER TO SEE IF BACKEND IS NOT EMPTY WHEN BACKEND IS NOT A FILESYSTEM
442
455
pass
443
456
else : #backend is empty - cannot inspect
@@ -446,7 +459,7 @@ def artifact_handler(self, interaction_type, query = None, **kwargs):
446
459
raise ValueError ("Error in notebook/inspect artifact handler: Need to ingest data into a backend before generating Jupyter notebook" )
447
460
operation_success = True
448
461
449
- elif interaction_type in ["read " , "process " ] and len (self .active_modules ['back-read' ]) > 0 :
462
+ elif interaction_type in ["process " , "read " ] and len (self .active_modules ['back-read' ]) > 0 :
450
463
first_backread = self .active_modules ['back-read' ][0 ]
451
464
if self .debug_level != 0 :
452
465
self .logger .info (f"{ first_backread .__class__ .__name__ } backend - { interaction_type .upper ()} the data" )
@@ -455,25 +468,34 @@ def artifact_handler(self, interaction_type, query = None, **kwargs):
455
468
elif interaction_type == "read" :
456
469
self .active_metadata = first_backread .read_to_artifact ()
457
470
operation_success = True
458
- elif interaction_type in ["read " , "process " ] and len (self .active_modules ['back-read' ]) == 0 :
471
+ elif interaction_type in ["process " , "read " ] and len (self .active_modules ['back-read' ]) == 0 :
459
472
backread_active = True
460
473
461
474
if operation_success :
462
475
end = datetime .now ()
463
476
if self .debug_level != 0 :
464
477
self .logger .info (f"Runtime: { end - start } " )
465
- if interaction_type in ['get ' , 'query ' ] and get_artifact_data is not None :
466
- return get_artifact_data
478
+ if interaction_type in ['query ' , 'get ' ] and query_data is not None :
479
+ return query_data
467
480
else :
468
481
not_run_msg = None
469
482
if backread_active :
470
- not_run_msg = 'Remember that back-WRITE backends cannot read/ process data and back-READ backends cannot put/ ingest'
483
+ not_run_msg = 'Remember that back-WRITE backends cannot process/read data and back-READ backends cannot ingest/put '
471
484
else :
472
- not_run_msg = 'Is your artifact interaction implemented in your backend?'
485
+ not_run_msg = 'Is your artifact interaction implemented in your specified backend?'
473
486
if self .debug_level != 0 :
474
487
self .logger .error (not_run_msg )
475
488
raise NotImplementedError (not_run_msg )
476
489
490
+ # Internal function used to get line numbers from return statements - should not be called by users
491
+ def trace_function (self , frame , event , arg ):
492
+ global return_line_number
493
+ global original_file
494
+ if event == "return" :
495
+ return_line_number = frame .f_lineno # Get line number
496
+ original_file = frame .f_code .co_filename # Get file name
497
+ return self .trace_function
498
+
477
499
def find (self , query_object ):
478
500
"""
479
501
Find all function that searches for all instances of 'query_object' in first loaded backend. Searches among all tables/column/cells
0 commit comments