Skip to content

Commit

Permalink
better asserts
Browse files Browse the repository at this point in the history
  • Loading branch information
floptical committed Apr 26, 2024
1 parent c27b657 commit 04b09ce
Showing 1 changed file with 8 additions and 11 deletions.
19 changes: 8 additions & 11 deletions databridge_etl_tools/oracle/oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,12 +239,10 @@ def extract(self):
self.check_remove_nulls()

num_rows_in_csv = rows.nrows()
if num_rows_in_csv == 0:
raise AssertionError('Error! Dataset is empty? Line count of CSV is 0.')
assert num_rows_in_csv != 0, 'Error! Dataset is empty? Line count of CSV is 0.'

self.logger.info(f'Asserting counts match between recorded count in db and extracted csv')
self.logger.info(f'{self.row_count} == {num_rows_in_csv}')
assert self.row_count == num_rows_in_csv
self.logger.info(f'{num_rows_in_csv} == {self.row_count}')
assert self.row_count == num_rows_in_csv, f'Row counts dont match!! extracted csv: {num_rows_in_csv}, oracle table: {self.row_count}'

self.logger.info(f'Checking row count again and comparing against csv count, this can catch large datasets that are actively updating..')

Expand All @@ -260,7 +258,7 @@ def extract(self):
cursor.execute(stmt)
recent_row_count = cursor.fetchone()[0]
self.logger.info(f'{recent_row_count} == {num_rows_in_csv}')
assert recent_row_count == num_rows_in_csv
assert recent_row_count == num_rows_in_csv, f'Row counts dont match!! recent row count: {recent_row_count}, csv : {self.num_rows_in_csv}'

self.load_csv_to_s3()
os.remove(self.csv_path)
Expand All @@ -273,8 +271,7 @@ def append(self):
print('loading CSV into geopetl..')
rows = etl.fromcsv(self.csv_path)
num_rows_in_csv = rows.nrows()
if num_rows_in_csv == 0:
raise AssertionError('Error! Dataset is empty? Line count of CSV is 0.')
assert num_rows_in_csv != 0, 'Error! Dataset is empty? Line count of CSV is 0.'
print(f'Rows: {num_rows_in_csv}')
interval = int(num_rows_in_csv / 10)

Expand All @@ -287,8 +284,7 @@ def load(self):
print('loading CSV into geopetl..')
rows = etl.fromcsv(self.csv_path)
num_rows_in_csv = rows.nrows()
if num_rows_in_csv == 0:
raise AssertionError('Error! Dataset is empty? Line count of CSV is 0.')
assert num_rows_in_csv != 0, 'Error! Dataset is empty? Line count of CSV is 0.'
print(f'Rows: {num_rows_in_csv}')
# Interval to print progress
interval = int(num_rows_in_csv / 10)
Expand All @@ -303,6 +299,7 @@ def load(self):
'''
cursor.execute(cols_stmt)
cols = cursor.fetchall()[0][0]
assert cols, f'Could not fetch columns, does the table exist?\n Statement: {cols_stmt}'
# Detect if registered through existence of objectid column
sde_registered = False
if 'OBJECTID_' in cols:
Expand Down Expand Up @@ -369,7 +366,7 @@ def load(self):
cursor.execute(f'SELECT COUNT(*) FROM {self.table_schema.upper()}.{self.table_name.upper()}')
oracle_rows = cursor.fetchone()[0]
print(f'assert {num_rows_in_csv} == {oracle_rows}')
assert num_rows_in_csv == oracle_rows
assert num_rows_in_csv == oracle_rows, f'Row counts dont match!! csv: {num_rows_in_csv}, oracle table: {oracle_rows}'
print('Done.')
except (Exception, KeyboardInterrupt) as e:
cursor.execute('ROLLBACK')
Expand Down

0 comments on commit 04b09ce

Please sign in to comment.