style: Constant case path string variables

chuangcaleb · Apr 27, 2022 · b348761 · b348761
1 parent f5dea09
commit b348761
Show file tree

Hide file tree

Showing 9 changed files with 36 additions and 41 deletions.
diff --git a/building_model/mtr_utils/import_dataset.py b/building_model/mtr_utils/import_dataset.py
@@ -1,16 +1,16 @@
 import pandas as pd
 from mtr_utils import config as cfg
 
-song_theme_feature_database_path = 'data/features/song_theme_feature_database.csv'
-song_theme_label_database_path = 'data/labels/song_theme_label_database.xlsx'
+FEATURE_DB_PATH = 'data/features/song_theme_feature_database.csv'
+LABEL_DB_PATH = 'data/labels/song_theme_label_database.xlsx'
 
 try:
 
     # Access song_theme_feature_database
-    raw_feature_df = pd.read_csv(song_theme_feature_database_path)
+    raw_feature_df = pd.read_csv(FEATURE_DB_PATH)
 
     # Access song_theme_labels_database
-    raw_label_df = pd.read_excel(song_theme_label_database_path)
+    raw_label_df = pd.read_excel(LABEL_DB_PATH)
 
     # Extract recognizable data from label dataset
     recognz_label_df = raw_label_df[raw_label_df.recognizable == 1]

diff --git a/calculating_dataset/clean_db.py b/calculating_dataset/clean_db.py
@@ -3,8 +3,8 @@
 import re
 
 # Access song_theme_database db
-song_theme_feature_database_path = 'data/features/song_theme_feature_database.csv'
-features_df = pd.read_csv(song_theme_feature_database_path, na_values=[' NaN'])
+FEATURE_DB_PATH = 'data/features/song_theme_feature_database.csv'
+features_df = pd.read_csv(FEATURE_DB_PATH, na_values=[' NaN'])
 
 # Clean ids
 features_df.rename(columns={'Unnamed: 0': 'id'}, inplace=True)
@@ -14,4 +14,4 @@
 # Replace NaN with 0
 features_df.fillna(0, inplace=True)
 
-features_df.to_csv(song_theme_feature_database_path, index=False)
+features_df.to_csv(FEATURE_DB_PATH, index=False)
diff --git a/calculating_dataset/generate_jsymbolic_config.py b/calculating_dataset/generate_jsymbolic_config.py
@@ -13,7 +13,7 @@ def config_write(string):
 # Input midi bin's root dir
 BIN_ROOT_DIR = DATA_ROOT_DIR + 'bin/'
 
-LABEL_DATABASE_PATH = DATA_ROOT_DIR + 'labels/song_theme_label_database.xlsx'
+LABEL_DB_PATH = DATA_ROOT_DIR + 'labels/song_theme_label_database.xlsx'
 
 # Output path
 FEAT_ROOT_DIR = DATA_ROOT_DIR + 'features/'
@@ -25,7 +25,7 @@ def config_write(string):
 # * Import Data ----------------------------------------------------------------
 
 # Access song_theme_label_database db
-label_df = pd.read_excel(LABEL_DATABASE_PATH)
+label_df = pd.read_excel(LABEL_DB_PATH)
 
 # Access our custom config file
 config_file = open(CONFIG_PATH, 'wb')

diff --git a/collecting_data/1_scraping_midi/scrape_bitmidi.py b/collecting_data/1_scraping_midi/scrape_bitmidi.py
@@ -7,9 +7,9 @@
 source = 'bitmidi'
 domain = "http://www." + source + ".com"
 
-download_path = 'data/bin/' + source
-if not os.path.exists(download_path):
-    os.makedirs(download_path)
+OUTPUT_DIR = 'data/bin/' + source
+if not os.path.exists(OUTPUT_DIR):
+    os.makedirs(OUTPUT_DIR)
 
 terminate = 0
 page_number = 0
@@ -63,7 +63,7 @@
                     "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
                 })
                 filename = download_header + "_" + source + ".mid"
-                with open(download_path + '/' + filename, 'wb') as saveMidFile:
+                with open(OUTPUT_DIR + '/' + filename, 'wb') as saveMidFile:
                     saveMidFile.write(mid_file.content)
                     print('Downloaded {} successfully.\n'.format(download_header))
 

diff --git a/collecting_data/1_scraping_midi/scrape_freemidi.py b/collecting_data/1_scraping_midi/scrape_freemidi.py
@@ -7,7 +7,7 @@
 source = 'freemidi'
 domain = "http://www." + source + ".com"
 
-path = os.path.realpath(__file__)
+CURRENT_PATH = os.path.realpath(__file__)
 
 print(f"\n\nScraping from {domain}")
 print("Type Y to scrape; anything else to skip\n")

diff --git a/collecting_data/1_scraping_midi/scrape_midiworld.py b/collecting_data/1_scraping_midi/scrape_midiworld.py
@@ -7,10 +7,9 @@
 domain = "http://www.midiworld.com"
 category = 'movie%20themes'  # CHANGE THIS
 
-download_path = 'data/bin/' + \
-    source + "/" + re.sub(r'%20', "-", category)
-if not os.path.exists(download_path):
-    os.makedirs(download_path)
+OUTPUT_DIR = 'data/bin/' + source + "/" + re.sub(r'%20', "-", category)
+if not os.path.exists(OUTPUT_DIR):
+    os.makedirs(OUTPUT_DIR)
 
 
 # Extract metadata from the download label
@@ -35,7 +34,7 @@ def downloadFile(anchor, filename):
 
     link = anchor['href']
     mid_file = requests.get(link, stream=True)
-    with open(download_path + '/' + filename, 'wb') as saveMidFile:
+    with open(OUTPUT_DIR + '/' + filename, 'wb') as saveMidFile:
         saveMidFile.write(mid_file.content)
         print('Downloaded \"{}\" successfully.'.format(filename))
 

diff --git a/collecting_data/2_building_dataset/create_db.py b/collecting_data/2_building_dataset/create_db.py
@@ -5,17 +5,16 @@
 Compares key index(es), compile & sort unique set, then overwrite?
 """
 
-root_path = 'data/bin'
-song_theme_database_path = 'data/labels/song_theme_label_database.xlsx'
-label_df = pd.DataFrame()
+BIN_DIR = 'data/bin'
+LABEL_DB_PATH = 'data/labels/song_theme_label_database.xlsx'
 
 # Get list of directories/sources
-directory_names = os.listdir(root_path)
+directory_names = os.listdir(BIN_DIR)
 # Get list of subfiles
-directories_data = [x for x in os.walk(root_path) if x[0] != root_path]
+directories_data = [x for x in os.walk(BIN_DIR) if x[0] != BIN_DIR]
 
 
-if not os.path.exists(song_theme_database_path):
+if not os.path.exists(LABEL_DB_PATH):
 
     # For each source/directory
     for i, directory_data in enumerate(directories_data):
@@ -38,7 +37,7 @@
     print(label_df)
 
     # Write to output csv file
-    label_df.to_excel(song_theme_database_path, index=False)
+    label_df.to_excel(LABEL_DB_PATH, index=False)
 
 else:
 

diff --git a/collecting_data/2_building_dataset/db_stats.py b/collecting_data/2_building_dataset/db_stats.py
@@ -6,15 +6,15 @@
 
 from process_db import *
 
-label_root_dir = 'data/labels/'
-song_theme_label_database_path = label_root_dir + \
-    'song_theme_label_database.xlsx'
+LABELS_DIR = 'data/labels/'
+LABEL_DB_PATH = LABELS_DIR + 'song_theme_label_database.xlsx'
+STATS_EXPORT_PATH = LABELS_DIR + 'label_stats_summary.json'
 
 # Convert all p's to 1's
-p_to_1_convert(song_theme_label_database_path)
+p_to_1_convert(LABEL_DB_PATH)
 
 # Import data
-label_df = pd.read_excel(song_theme_label_database_path)
+label_df = pd.read_excel(LABEL_DB_PATH)
 
 # * Aux methods
 
@@ -99,13 +99,11 @@ def percentage(positive, total):
 )
 
 plt.tight_layout()
-plt.savefig(label_root_dir + 'label_freq.png')
+plt.savefig(LABELS_DIR + 'label_freq.png')
 
 # * EXPORT
 
 sorted_label_stats_dict = sorted_label_stats_df.to_dict()
-# with open(label_summary_export_path, 'w') as f:
-#     f.write(sorted_label_stats_json)
 
 stats_dict = {
     'total_count': total_count,
@@ -117,8 +115,6 @@ def percentage(positive, total):
     'recog_procs_perc': perc_recog_procs,
 } | sorted_label_stats_dict
 
-label_summary_export_path = label_root_dir + 'label_stats_summary.json'
+json.dump(stats_dict, open(STATS_EXPORT_PATH, "w"))
 
-json.dump(stats_dict, open(label_summary_export_path, "w"))
-
-print('Saved label statistics to ' + label_summary_export_path)
+print('Saved label statistics to ' + STATS_EXPORT_PATH)
diff --git a/collecting_data/2_building_dataset/process_db.py b/collecting_data/2_building_dataset/process_db.py
@@ -1,9 +1,10 @@
 import pandas as pd
 
 
-def p_to_1_convert(song_theme_label_database_path):
+def p_to_1_convert(label_db_path):
+    """ Converts all occurrences of 'p' to '1' """
 
-    label_df = pd.read_excel(song_theme_label_database_path)
+    label_df = pd.read_excel(label_db_path)
 
     # Replace all 'p' labels with '1'
     label_df.replace('p', 1, inplace=True)
@@ -12,5 +13,5 @@ def p_to_1_convert(song_theme_label_database_path):
     # main_df.iloc[:, 2:28] = main_df.iloc[:, 2:28].astype("Int64")
 
     # Write back to excel
-    label_df.to_excel(song_theme_label_database_path, index=False,
+    label_df.to_excel(label_db_path, index=False,
                       header=True, freeze_panes=(1, 1))