From 6da60d0959e33ee5da86af0c1bf60f727de7f9a4 Mon Sep 17 00:00:00 2001 From: LPeurey Date: Thu, 7 Nov 2024 14:13:04 +0100 Subject: [PATCH 1/3] enable subsequent conversions in basic and standard to edit metadata and not souble entries --- ChildProject/pipelines/processors.py | 80 +++++++++------------------- 1 file changed, 26 insertions(+), 54 deletions(-) diff --git a/ChildProject/pipelines/processors.py b/ChildProject/pipelines/processors.py index 87042aa5..7bf5f90c 100644 --- a/ChildProject/pipelines/processors.py +++ b/ChildProject/pipelines/processors.py @@ -165,12 +165,11 @@ def process_recording(self, recording): original_file = self.project.get_recording_path( recording["recording_filename"], self.input_profile ) + converted_filename = os.path.splitext(recording["recording_filename"])[0] + "." + self.format destination_file = os.path.join( self.output_directory(), - os.path.splitext(recording["recording_filename"])[0] - + "." - + self.format, + converted_filename, ) os.makedirs(name=os.path.dirname(destination_file), exist_ok=True) @@ -201,36 +200,23 @@ def process_recording(self, recording): (stdout, stderr) = proc.communicate() success = proc.returncode == 0 - if not success: - logger_annotations.error(stderr) - return pd.DataFrame( - [ - { - "original_filename": recording["recording_filename"], - "converted_filename": "", - "success": False, - "error": stderr, - } - ] - ) - else: - converted_files = [ - os.path.splitext(recording["recording_filename"])[0] - + "." - + self.format - ] - - return pd.DataFrame( + df = pd.DataFrame( [ { "original_filename": recording["recording_filename"], - "converted_filename": cf, - "success": True, + "converted_filename": converted_filename, } - for cf in converted_files ] ) + if not success: + logger_annotations.error(stderr) + df = df.assign(success=False, error=stderr) + return df + else: + df = df.assign(success=True) + return df + @staticmethod def add_parser(subparsers, subcommand): parser = subparsers.add_parser(subcommand, help="basic audio conversion") @@ -473,12 +459,11 @@ def process_recording(self, recording): original_file = self.project.get_recording_path( recording["recording_filename"], self.input_profile ) + converted_filename = os.path.splitext(recording["recording_filename"])[0] + "." + self.format destination_file = os.path.join( self.output_directory(), - os.path.splitext(recording["recording_filename"])[0] - + "." - + self.format, + converted_filename, ) os.makedirs(name=os.path.dirname(destination_file), exist_ok=True) @@ -511,37 +496,24 @@ def process_recording(self, recording): (stdout, stderr) = proc.communicate() success = proc.returncode == 0 - if not success: - print(stderr, file=sys.stderr) - - return pd.DataFrame( - [ - { - "original_filename": recording["recording_filename"], - "converted_filename": "", - "success": False, - "error": stderr, - } - ] - ) - else: - converted_files = [ - os.path.splitext(recording["recording_filename"])[0] - + "." - + self.format - ] - - return pd.DataFrame( + df = pd.DataFrame( [ { "original_filename": recording["recording_filename"], - "converted_filename": cf, - "success": True, + "converted_filename": converted_filename, } - for cf in converted_files ] ) + if not success: + logger_annotations.error(stderr) + df = df.assign(success='False', error=stderr) + return df + else: + df = df.assign(success='True') + return df + + @staticmethod def add_parser(subparsers, subcommand): parser = subparsers.add_parser(subcommand, help="standard audio conversion") @@ -610,7 +582,7 @@ def run( ) logger_annotations.info( - "exported processor parameters to ", + "exported processor parameters to %s", parameters_path, ) From 59b5cfa5ed2cdabe76c12d8d8074fcad7dd07852 Mon Sep 17 00:00:00 2001 From: LPeurey Date: Thu, 7 Nov 2024 14:14:15 +0100 Subject: [PATCH 2/3] CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5320ee34..ea22344b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ All notable changes to this project will be documented in this file. - docs and tests for init command - docs and tests for automated-import command +### Fixed + +- audio conversions in basic and standard now always include conversion filename and edit the file properly + ## [0.2.2] 2024-06-26 ### Added From e41381838ac39a0b5718d6a263dc3421753f2a6b Mon Sep 17 00:00:00 2001 From: LPeurey Date: Thu, 7 Nov 2024 14:29:08 +0100 Subject: [PATCH 3/3] doc for standard audio processing --- docs/source/processors.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/source/processors.rst b/docs/source/processors.rst index f594bcd0..976a5be4 100644 --- a/docs/source/processors.rst +++ b/docs/source/processors.rst @@ -42,6 +42,30 @@ Values provided to this option should be existing ``recording_filename`` values The ``--skip-existing`` switch can be used to skip previously processed files. +Standard audio conversion +~~~~~~~~~~~~~~~~~~~~~~ + +Same as the basic processor but using standard parameters for the conversion: +- single-channel (first channel is kept) +- 16KHz sampling rate +- codec pcm_s16le +- wav format +Audios are exported to ``recordings/converted/standard``. + +.. clidoc:: + + child-project process /path/to/dataset standard --help + +Example: + +:: + + child-project process . standard + +Values provided to the option ``--recordings`` should be existing ``recording_filename`` values in ``metadata/recordings.csv``. + +The ``--skip-existing`` switch can be used to skip previously processed files. + Multi-core audio conversion with slurm on a cluster ===================================================