Skip to content

Commit

Permalink
codes meets the requirement:
Browse files Browse the repository at this point in the history
    For whitespace: LABEL, definition, definition source, alternative label
users can pass the label which needs to be processed and the file path as arguments into function clean_up_fn.
One example has been uploaded.
  • Loading branch information
yupingzheng00000 committed Aug 9, 2024
1 parent 5e89fff commit 8defefd
Show file tree
Hide file tree
Showing 2 changed files with 1,114 additions and 0 deletions.
33 changes: 33 additions & 0 deletions docs/VO-cleanup/clean_up_definition_field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pandas as pd
import os
from clean_up_data import clean_up_article

def clean_up(cell):
if isinstance(cell, str):
cell = cell.strip()
cell = cell.capitalize()
return cell


def clean_up_fn(columns_to_clean, file_paths):
for file_path in file_paths:
df = pd.read_csv(file_path)

# Apply the clean_up function to each specified column
for column in columns_to_clean:
if column in df.columns:
df[column] = df[column].apply(clean_up_article)

# Construct the output file path
dir_name, base_name = os.path.split(file_path)
name, ext = os.path.splitext(base_name)
output_file = os.path.join(dir_name, f"{name}_processed{ext}")

# Save the modified DataFrame to a new file
df.to_csv(output_file, index=False)
print(f"Modified data saved to {output_file}")


# Example usage
clean_up_fn(['definition', 'LABEL'], ["C:\\Users\\00000\\VO\\docs\\VO-cleanup\\term_editor\\processed_file2.csv",
'C:\\Users\\00000\\VO\\src\\templates\\vaccine_adjuvant.csv'])
Loading

0 comments on commit 8defefd

Please sign in to comment.