Merge branch 'master' into annotations/outdated_missing

LAAC-LSCP · Nov 8, 2024 · c3d836b · c3d836b
2 parents 3e72050 + 7aa8a1b
commit c3d836b
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 54 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,7 @@ All notable changes to this project will be documented in this file.
 ### Fixed
 
 - does not fail anymore when annotations are missing their merged_from set
+- audio conversions in basic and standard now always include conversion filename and edit the file properly
 
 ## [0.2.2] 2024-06-26
 

diff --git a/ChildProject/pipelines/processors.py b/ChildProject/pipelines/processors.py
@@ -165,12 +165,11 @@ def process_recording(self, recording):
         original_file = self.project.get_recording_path(
             recording["recording_filename"], self.input_profile
         )
+        converted_filename = os.path.splitext(recording["recording_filename"])[0] + "." + self.format
 
         destination_file = os.path.join(
             self.output_directory(),
-            os.path.splitext(recording["recording_filename"])[0]
-            + "."
-            + self.format,
+            converted_filename,
         )
 
         os.makedirs(name=os.path.dirname(destination_file), exist_ok=True)
@@ -201,36 +200,23 @@ def process_recording(self, recording):
         (stdout, stderr) = proc.communicate()
         success = proc.returncode == 0
 
-        if not success:
-            logger_annotations.error(stderr)
-            return pd.DataFrame(
-                [
-                    {
-                        "original_filename": recording["recording_filename"],
-                        "converted_filename": "",
-                        "success": False,
-                        "error": stderr,
-                    }
-                ]
-            )
-        else:
-            converted_files = [
-                os.path.splitext(recording["recording_filename"])[0]
-                + "."
-                + self.format
-            ]
-
-        return pd.DataFrame(
+        df = pd.DataFrame(
             [
                 {
                     "original_filename": recording["recording_filename"],
-                    "converted_filename": cf,
-                    "success": True,
+                    "converted_filename": converted_filename,
                 }
-                for cf in converted_files
             ]
         )
 
+        if not success:
+            logger_annotations.error(stderr)
+            df = df.assign(success=False, error=stderr)
+            return df
+        else:
+            df = df.assign(success=True)
+            return df
+
     @staticmethod
     def add_parser(subparsers, subcommand):
         parser = subparsers.add_parser(subcommand, help="basic audio conversion")
@@ -473,12 +459,11 @@ def process_recording(self, recording):
         original_file = self.project.get_recording_path(
             recording["recording_filename"], self.input_profile
         )
+        converted_filename = os.path.splitext(recording["recording_filename"])[0] + "." + self.format
 
         destination_file = os.path.join(
             self.output_directory(),
-            os.path.splitext(recording["recording_filename"])[0]
-            + "."
-            + self.format,
+            converted_filename,
         )
 
         os.makedirs(name=os.path.dirname(destination_file), exist_ok=True)
@@ -511,37 +496,24 @@ def process_recording(self, recording):
         (stdout, stderr) = proc.communicate()
         success = proc.returncode == 0
 
-        if not success:
-            print(stderr, file=sys.stderr)
-
-            return pd.DataFrame(
-                [
-                    {
-                        "original_filename": recording["recording_filename"],
-                        "converted_filename": "",
-                        "success": False,
-                        "error": stderr,
-                    }
-                ]
-            )
-        else:
-            converted_files = [
-                os.path.splitext(recording["recording_filename"])[0]
-                + "."
-                + self.format
-            ]
-
-        return pd.DataFrame(
+        df = pd.DataFrame(
             [
                 {
                     "original_filename": recording["recording_filename"],
-                    "converted_filename": cf,
-                    "success": True,
+                    "converted_filename": converted_filename,
                 }
-                for cf in converted_files
             ]
         )
 
+        if not success:
+            logger_annotations.error(stderr)
+            df = df.assign(success='False', error=stderr)
+            return df
+        else:
+            df = df.assign(success='True')
+            return df
+
+
     @staticmethod
     def add_parser(subparsers, subcommand):
         parser = subparsers.add_parser(subcommand, help="standard audio conversion")
@@ -610,7 +582,7 @@ def run(
         )
 
         logger_annotations.info(
-            "exported processor parameters to ",
+            "exported processor parameters to %s",
             parameters_path,
             )   
 

diff --git a/docs/source/processors.rst b/docs/source/processors.rst
@@ -42,6 +42,30 @@ Values provided to this option should be existing ``recording_filename`` values
 
 The ``--skip-existing`` switch can be used to skip previously processed files.
 
+Standard audio conversion
+~~~~~~~~~~~~~~~~~~~~~~
+
+Same as the basic processor but using standard parameters for the conversion:
+- single-channel (first channel is kept)
+- 16KHz sampling rate
+- codec pcm_s16le
+- wav format
+Audios are exported to ``recordings/converted/standard``.
+
+.. clidoc::
+
+   child-project process /path/to/dataset standard --help
+
+Example:
+
+::
+
+   child-project process . standard
+
+Values provided to the option ``--recordings`` should be existing ``recording_filename`` values in ``metadata/recordings.csv``.
+
+The ``--skip-existing`` switch can be used to skip previously processed files.
+
 Multi-core audio conversion with slurm on a cluster
 ===================================================