Skip to content

Commit

Permalink
rename option for knack extract
Browse files Browse the repository at this point in the history
  • Loading branch information
floptical committed Nov 26, 2024
1 parent fed2305 commit e37778d
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
24 changes: 24 additions & 0 deletions databridge_etl_tools/knack/knack.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ def __init__(self,
api_key,
s3_bucket,
s3_key,
rename_fields,
**kwargs):
self.knack_objectid = knack_objectid
self.app_id = app_id
self.api_key = api_key
self.s3_bucket = s3_bucket
self.s3_key = s3_key
self.rename_fields = rename_fields
self.csv_path = '/tmp/output.csv'

def get_type(self, knack_type):
Expand Down Expand Up @@ -153,7 +155,26 @@ def convert_to_csv_row(self, schema, record):

return out

def rename_csv_fields(self):
# Convert rename_fields string into a dict
rename_dict = dict(field.split(':') for field in self.rename_fields.split(','))

# Read the CSV file
with open(self.csv_path, 'r', newline='', encoding='utf-8') as infile:
reader = csv.reader(infile)
rows = list(reader)

# Get the header and rename fields
header = rows[0]
print("Original header:", header)
updated_header = [rename_dict.get(field, field) for field in header]
print("Updated header:", updated_header)

# Write the updated CSV back
with open(self.csv_path, 'w', newline='', encoding='utf-8') as outfile:
writer = csv.writer(outfile)
writer.writerow(updated_header) # Write the updated header
writer.writerows(rows[1:]) # Write the remaining data

def load_to_s3(self):
s3 = boto3.resource('s3')
Expand All @@ -179,6 +200,9 @@ def extract(self):
for record in records_batch:
out_record = self.convert_to_csv_row(schema, record)
writer.writerow(out_record)

if self.rename_fields:
self.rename_csv_fields()

num_lines = sum(1 for _ in open(self.csv_path)) - 1
assert num_lines > 0, 'CSV file contains 0 lines??'
Expand Down
1 change: 1 addition & 0 deletions databridge_etl_tools/knack/knack_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
@click.option('--knack_objectid', required=True, help='Not an objectid in the ESRI sense, refers to a table under an "app" in Knack')
@click.option('--s3_bucket', required=True, help='Bucket to place the extracted csv in.')
@click.option('--s3_key', required=True, help='key under the bucket, example: "staging/dept/table_name.csv')
@click.option('--rename_fields', required=False, default=None, help='Fields to rename, example: "old_field_name:new_field_name,old_field_name2:new_field_name2"')
@click.option('--indent', type=int, default=None, help='???')
def knack(ctx, **kwargs):
ctx.obj = Knack(**kwargs)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ py_modules = ['databridge_etl_tools']

[project]
name = "databridge-etl-tools"
version = "1.3.7"
version = "1.3.8"
description = "Command line tools to extract and load SQL data to various endpoints"
authors = [
{name = "citygeo", email = "[email protected]"},
Expand Down

0 comments on commit e37778d

Please sign in to comment.