LAAC-LSCP · lucasgautheron · Mar 4, 2021 · Jul 25, 2022
diff --git a/ChildProject/annotations.py b/ChildProject/annotations.py
@@ -1477,10 +1477,70 @@ def clip_segments(segments: pd.DataFrame, start: int, stop: int) -> pd.DataFrame
 
         start = int(start)
         stop = int(stop)
-
+        
         segments["segment_onset"].clip(lower=start, upper=stop, inplace=True)
         segments["segment_offset"].clip(lower=start, upper=stop, inplace=True)
 
         segments = segments[segments["segment_offset"] > segments["segment_onset"]]
 
         return segments
+
+    def get_vc_stats(self, segments: pd.DataFrame, turntakingthresh: float = 1):
+        segments = segments.sort_values(['segment_onset', 'segment_offset'])
+        segments = segments[segments['speaker_type'] != 'SPEECH']
+        segments['duration'] = segments['segment_offset']-segments['segment_onset']
+        segments['iti'] = segments['segment_onset'] - segments['segment_offset'].shift(1)
+        segments['prev_speaker_type'] = segments['speaker_type'].shift(1)
+
+        key_child_env = ['FEM', 'MAL', 'OCH']
+
+        segments['turn'] = segments.apply(
+            lambda row: (row['iti'] < turntakingthresh) and (
+                (row['speaker_type'] == 'CHI' and row['prev_speaker_type'] in key_child_env) or
+                (row['speaker_type'] in key_child_env and row['prev_speaker_type'] == 'CHI')
+            ), axis = 1
+        )
+
+        segments['post_iti'] = segments['segment_onset'].shift(-1) - segments['segment_offset']
+        segments['next_speaker_type'] = segments['speaker_type'].shift(-1)
+        segments['cds'] = segments.apply(
+            lambda row: row['duration'] if (
+                (row['speaker_type'] == 'CHI' and row['prev_speaker_type'] in key_child_env and row['iti'] < turntakingthresh) or
+                (row['speaker_type'] in key_child_env and row['prev_speaker_type'] == 'CHI' and row['iti'] < turntakingthresh) or
+                (row['speaker_type'] == 'CHI' and row['next_speaker_type'] in key_child_env and row['post_iti'] < turntakingthresh) or
+                (row['speaker_type'] in key_child_env and row['next_speaker_type'] == 'CHI' and row['post_iti'] < turntakingthresh)
+            ) else 0, axis = 1
+        )
+
+        return segments.groupby('speaker_type').agg(
+            cum_dur = ('duration', 'sum'),
+            voc_count = ('duration', 'count'),
+            turns = ('turn', 'sum'),
+            cds_dur = ('cds', 'sum')
+        )
+
+    def plot(self, annotations):
+        from matplotlib import pyplot as plt
+        from pyannote.core import Timeline, Annotation, Segment, notebook
+
+        nrows = len(annotations['recording_filename'].unique())
+        fig, ax = plt.subplots(nrows = nrows, ncols = 1)
+        fig.set_figwidth(20)
+        fig.set_figheight(nrows * 2)
+
+        i = 0
+        for recording_filename, recording_annotations in annotations.groupby('recording_filename'):
+            pyannotation = Annotation()
+
+            for annotation in recording_annotations.to_dict(orient = 'records'):
+                start = annotation['range_onset'] + annotation['time_seek']
+                end = annotation['range_offset'] + annotation['time_seek']
+
+                pyannotation[Segment(start, end), annotation['set']] = annotation['set']
+
+                print(annotation)
+
+            notebook.plot_annotation(pyannotation, ax = ax[i], legend = True, time = True)
+            i += 1
+
+        return fig, ax
diff --git a/ChildProject/cmdline.py b/ChildProject/cmdline.py
@@ -314,12 +314,61 @@ def rename_annotations(args):
 
     am = AnnotationManager(project)
     am.read()
+
     am.rename_set(
         args.set,
         args.new_set,
         recursive=args.recursive,
         ignore_errors=args.ignore_errors,
-    )
+        )
+
+@subcommand([
+    arg("source", help = "project path"),
+    arg("--recordings", help = 'list of recordings to plot', nargs = '*', default = [])
+])
+def plot_annotations(args):
+    from matplotlib import pyplot as plt
+    """show a diagram representing available annotations for each recording"""
+
+    project = ChildProject(args.source)
+    errors, warnings = project.validate(ignore_files = True)
+
+    if len(errors) > 0:
+        print("validation failed, {} error(s) occured".format(len(errors)), file = sys.stderr)
+        sys.exit(1)
+
+    am = AnnotationManager(project)
+    am.read()
+    annotations = am.annotations
+
+    if len(args.recordings):
+        annotations = annotations[annotations['recording_filename'].isin(args.recordings)]
+
+    fig, ax = am.plot(annotations)
+    plt.show()
+
+@subcommand([
+    arg("dataset", help = "dataset to install. Should be a valid repository name at https://github.com/LAAC-LSCP. (e.g.: solomon-data)"),
+    arg("--destination", help = "destination path", required = False, default = ""),
+    arg("--storage-hostname", dest = "storage_hostname", help = "ssh storage hostname (e.g. 'foberon')", required = False, default = "")
+])
+def import_data(args):
+    """import and configures a datalad dataset"""
+
+    import datalad.api
+    import datalad.distribution.dataset
+
+    if args.destination:
+        destination = args.destination
+    else:
+        destination = os.path.splitext(os.path.basename(args.dataset))[0]
+
+    datalad.api.install(source = args.dataset, path = destination)
+
+    ds = datalad.distribution.dataset.require_dataset(
+        destination,
+        check_installed = True,
+        purpose = 'configuration'
 
 
 @subcommand([arg("source", help="source data path")])