rename option for knack extract

CityOfPhiladelphia · Nov 26, 2024 · e37778d · e37778d
1 parent fed2305
commit e37778d
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 1 deletion.
diff --git a/databridge_etl_tools/knack/knack.py b/databridge_etl_tools/knack/knack.py
@@ -20,12 +20,14 @@ def __init__(self,
                  api_key, 
                  s3_bucket, 
                  s3_key,
+                 rename_fields,
                  **kwargs):
         self.knack_objectid = knack_objectid
         self.app_id = app_id
         self.api_key = api_key
         self.s3_bucket = s3_bucket
         self.s3_key = s3_key
+        self.rename_fields = rename_fields
         self.csv_path = '/tmp/output.csv'
 
     def get_type(self, knack_type):
@@ -153,7 +155,26 @@ def convert_to_csv_row(self, schema, record):
 
         return out
 
+    def rename_csv_fields(self):
+        # Convert rename_fields string into a dict
+        rename_dict = dict(field.split(':') for field in self.rename_fields.split(','))
 
+        # Read the CSV file
+        with open(self.csv_path, 'r', newline='', encoding='utf-8') as infile:
+            reader = csv.reader(infile)
+            rows = list(reader)
+
+        # Get the header and rename fields
+        header = rows[0]
+        print("Original header:", header)
+        updated_header = [rename_dict.get(field, field) for field in header]
+        print("Updated header:", updated_header)
+
+        # Write the updated CSV back
+        with open(self.csv_path, 'w', newline='', encoding='utf-8') as outfile:
+            writer = csv.writer(outfile)
+            writer.writerow(updated_header)  # Write the updated header
+            writer.writerows(rows[1:])  # Write the remaining data
 
     def load_to_s3(self):
         s3 = boto3.resource('s3')
@@ -179,6 +200,9 @@ def extract(self):
                 for record in records_batch:
                     out_record = self.convert_to_csv_row(schema, record)
                     writer.writerow(out_record)
+
+        if self.rename_fields:
+            self.rename_csv_fields()
 
         num_lines = sum(1 for _ in open(self.csv_path)) - 1
         assert num_lines > 0, 'CSV file contains 0 lines??'

diff --git a/databridge_etl_tools/knack/knack_commands.py b/databridge_etl_tools/knack/knack_commands.py
@@ -10,6 +10,7 @@
 @click.option('--knack_objectid', required=True, help='Not an objectid in the ESRI sense, refers to a table under an "app" in Knack')
 @click.option('--s3_bucket', required=True, help='Bucket to place the extracted csv in.')
 @click.option('--s3_key', required=True, help='key under the bucket, example: "staging/dept/table_name.csv')
+@click.option('--rename_fields', required=False, default=None, help='Fields to rename, example: "old_field_name:new_field_name,old_field_name2:new_field_name2"')
 @click.option('--indent', type=int, default=None, help='???')
 def knack(ctx, **kwargs):
     ctx.obj = Knack(**kwargs)

diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ py_modules = ['databridge_etl_tools']
 
 [project]
 name = "databridge-etl-tools"
-version = "1.3.7"
+version = "1.3.8"
 description = "Command line tools to extract and load SQL data to various endpoints"
 authors = [
     {name = "citygeo", email = "[email protected]"},