Skip to content

Commit

Permalink
Merge pull request #1614 from Honei/vox12
Browse files Browse the repository at this point in the history
[vec]change the vector output to numpy.array
  • Loading branch information
LeoMax-Xiong authored Mar 28, 2022
2 parents f9bf260 + ed7113f commit d60856b
Show file tree
Hide file tree
Showing 16 changed files with 211 additions and 256 deletions.
24 changes: 14 additions & 10 deletions dataset/voxceleb/voxceleb1.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,15 @@

TRIAL_BASE_URL = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/"
TRIAL_LIST = {
"veri_test.txt": "29fc7cc1c5d59f0816dc15d6e8be60f7", # voxceleb1
"veri_test2.txt": "b73110731c9223c1461fe49cb48dddfc", # voxceleb1(cleaned)
"list_test_hard.txt": "21c341b6b2168eea2634df0fb4b8fff1", # voxceleb1-H
"list_test_hard2.txt": "857790e09d579a68eb2e339a090343c8", # voxceleb1-H(cleaned)
"list_test_all.txt": "b9ecf7aa49d4b656aa927a8092844e4a", # voxceleb1-E
"list_test_all2.txt": "a53e059deb562ffcfc092bf5d90d9f3a" # voxceleb1-E(cleaned)
}
"veri_test.txt": "29fc7cc1c5d59f0816dc15d6e8be60f7", # voxceleb1
"veri_test2.txt": "b73110731c9223c1461fe49cb48dddfc", # voxceleb1(cleaned)
"list_test_hard.txt": "21c341b6b2168eea2634df0fb4b8fff1", # voxceleb1-H
"list_test_hard2.txt":
"857790e09d579a68eb2e339a090343c8", # voxceleb1-H(cleaned)
"list_test_all.txt": "b9ecf7aa49d4b656aa927a8092844e4a", # voxceleb1-E
"list_test_all2.txt":
"a53e059deb562ffcfc092bf5d90d9f3a" # voxceleb1-E(cleaned)
}

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
Expand Down Expand Up @@ -176,6 +178,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
# create the manifest file
create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path)


def prepare_trial(base_url, data_list, target_dir):
if not os.path.exists(target_dir):
os.makedirs(target_dir)
Expand All @@ -185,10 +188,12 @@ def prepare_trial(base_url, data_list, target_dir):
if not os.path.exists(os.path.join(target_dir, trial)):
download_url = " --no-check-certificate " + base_url + "/" + trial
download(url=download_url, md5sum=md5sum, target_dir=target_dir)


def main():
if args.target_dir.startswith('~'):
args.target_dir = os.path.expanduser(args.target_dir)

# prepare the vox1 dev data
prepare_dataset(
base_url=BASE_URL,
Expand All @@ -209,8 +214,7 @@ def main():
prepare_trial(
base_url=TRIAL_BASE_URL,
data_list=TRIAL_LIST,
target_dir=os.path.dirname(args.manifest_prefix)
)
target_dir=os.path.dirname(args.manifest_prefix))

print("Manifest prepare done!")

Expand Down
29 changes: 15 additions & 14 deletions dataset/voxceleb/voxceleb2.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@
import glob
import json
import os
import subprocess
from pathlib import Path

import soundfile

from utils.utility import check_md5sum
from utils.utility import download
from utils.utility import unzip

Expand All @@ -40,9 +38,8 @@
DEV_DATA_URL = BASE_URL + '/vox2_aac.zip'
DEV_MD5SUM = "bbc063c46078a602ca71605645c2a402"


# test data
TEST_DATA_URL = BASE_URL + '/vox2_test_aac.zip'
TEST_DATA_URL = BASE_URL + '/vox2_test_aac.zip'
TEST_MD5SUM = "0d2b3ea430a821c33263b5ea37ede312"

parser = argparse.ArgumentParser(description=__doc__)
Expand All @@ -56,14 +53,16 @@
default="manifest",
type=str,
help="Filepath prefix for output manifests. (default: %(default)s)")
parser.add_argument("--download",
default=False,
action="store_true",
help="Download the voxceleb2 dataset. (default: %(default)s)")
parser.add_argument("--generate",
default=False,
action="store_true",
help="Generate the manifest files. (default: %(default)s)")
parser.add_argument(
"--download",
default=False,
action="store_true",
help="Download the voxceleb2 dataset. (default: %(default)s)")
parser.add_argument(
"--generate",
default=False,
action="store_true",
help="Generate the manifest files. (default: %(default)s)")

args = parser.parse_args()

Expand Down Expand Up @@ -138,7 +137,7 @@ def download_dataset(url, md5sum, target_dir, dataset):
def main():
if args.target_dir.startswith('~'):
args.target_dir = os.path.expanduser(args.target_dir)

# download and unpack the vox2-dev data
print("download: {}".format(args.download))
if args.download:
Expand All @@ -157,7 +156,9 @@ def main():
print("VoxCeleb2 download is done!")

if args.generate:
create_manifest(args.target_dir, manifest_path_prefix=args.manifest_prefix)
create_manifest(
args.target_dir, manifest_path_prefix=args.manifest_prefix)


if __name__ == '__main__':
main()
5 changes: 3 additions & 2 deletions demos/audio_searching/src/operations/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ def get_audios(path):
"""
supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
return [
item for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))]
item
for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))]
for item in sublist if os.path.splitext(item)[1] in supported_formats
]

Expand Down
178 changes: 79 additions & 99 deletions demos/speaker_verification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,56 +46,46 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav

Output:

```bash
demo {'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 ,
5.3940268 , -3.04878 , 1.611095 , 10.127234 ,
-10.534177 , -15.821609 , 1.2032688 , -0.35080156,
1.2629458 , -12.643498 , -2.5758228 , -11.343508 ,
2.3385992 , -8.719341 , 14.213509 , 15.404744 ,
-0.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 , -8.77959 , 7.0576906 , 4.648855 ,
-1.3089896 , -23.294737 , 8.013747 , 13.891729 ,
-9.926753 , 5.655307 , -5.9422326 , -22.842539 ,
0.6293588 , -18.46266 , -10.811862 , 9.8192625 ,
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 ,
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 ,
2.0207152 , 7.264915 , -6.40137 , 23.63524 ,
2.9711294 , -22.708025 , 9.93719 , 20.354511 ,
-10.324688 , -0.700492 , -8.783211 , -5.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 ,
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 ,
14.568347 , -10.568833 , 4.982321 , -4.342062 ,
0.0914714 , 12.645902 , -5.74285 , -3.2141201 ,
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 ,
-6.7964664 , 16.865469 , -11.54324 , 7.681869 ,
0.44475392, 9.708182 , -8.932846 , 0.4123232 ,
-4.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 , -18.078873 ,
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 ,
-14.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 ,
6.448228 , -11.924197 , 2.171869 , 2.0423572 ,
-6.173772 , 10.778437 , 25.77281 , -4.9495463 ,
14.57806 , 0.3044315 , 2.6132357 , -7.591999 ,
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 ,
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 ,
9.037825 , -4.4150195 , 6.859591 , -12.274467 ,
-0.88911164, 5.186309 , -3.9988663 , -13.638606 ,
-9.925445 , -0.06329413, -3.6709652 , -12.397416 ,
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
```bash
demo [ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
-3.04878 1.611095 10.127234 -10.534177 -15.821609
1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
-11.343508 2.3385992 -8.719341 14.213509 15.404744
-0.39327756 6.338786 2.688887 8.7104025 17.469526
-8.77959 7.0576906 4.648855 -1.3089896 -23.294737
8.013747 13.891729 -9.926753 5.655307 -5.9422326
-22.842539 0.6293588 -18.46266 -10.811862 9.8192625
3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
1.7594414 -0.6485091 4.485623 2.0207152 7.264915
-6.40137 23.63524 2.9711294 -22.708025 9.93719
20.354511 -10.324688 -0.700492 -8.783211 -5.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
-9.224193 14.568347 -10.568833 4.982321 -4.342062
0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
-6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-11.54324 7.681869 0.44475392 9.708182 -8.932846
0.4123232 -4.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183 -18.078873 6.2983274
-0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
4.010979 11.000591 -2.8873312 7.1352735 -16.79663
18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.875734 -3.0836344 -2.9999814 13.751918 6.448228
-11.924197 2.171869 2.0423572 -6.173772 10.778437
25.77281 -4.9495463 14.57806 0.3044315 2.6132357
-7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-4.9401326 23.465864 5.1685796 -9.018578 9.037825
-4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
-12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
8.731719 -20.778936 -11.495662 5.8033476 -4.752041
10.833007 -6.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036 -9.501399 7.2315617
-6.417456 1.3333273 11.872697 -0.30664724 8.8845
6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-3.2701402 -11.508579 ]
```

- Python API
Expand All @@ -118,55 +108,45 @@ wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
Output:
```bash
# Vector Result:
{'dim': 192, 'embedding': array([ -5.749211 , 9.505463 , -8.200284 , -5.2075014 ,
5.3940268 , -3.04878 , 1.611095 , 10.127234 ,
-10.534177 , -15.821609 , 1.2032688 , -0.35080156,
1.2629458 , -12.643498 , -2.5758228 , -11.343508 ,
2.3385992 , -8.719341 , 14.213509 , 15.404744 ,
-0.39327756, 6.338786 , 2.688887 , 8.7104025 ,
17.469526 , -8.77959 , 7.0576906 , 4.648855 ,
-1.3089896 , -23.294737 , 8.013747 , 13.891729 ,
-9.926753 , 5.655307 , -5.9422326 , -22.842539 ,
0.6293588 , -18.46266 , -10.811862 , 9.8192625 ,
3.0070958 , 3.8072643 , -2.3861165 , 3.0821571 ,
-14.739942 , 1.7594414 , -0.6485091 , 4.485623 ,
2.0207152 , 7.264915 , -6.40137 , 23.63524 ,
2.9711294 , -22.708025 , 9.93719 , 20.354511 ,
-10.324688 , -0.700492 , -8.783211 , -5.27593 ,
15.999649 , 3.3004563 , 12.747926 , 15.429879 ,
4.7849145 , 5.6699696 , -2.3826702 , 10.605882 ,
3.9112158 , 3.1500628 , 15.859915 , -2.1832209 ,
-23.908653 , -6.4799504 , -4.5365124 , -9.224193 ,
14.568347 , -10.568833 , 4.982321 , -4.342062 ,
0.0914714 , 12.645902 , -5.74285 , -3.2141201 ,
-2.7173362 , -6.680575 , 0.4757669 , -5.035051 ,
-6.7964664 , 16.865469 , -11.54324 , 7.681869 ,
0.44475392, 9.708182 , -8.932846 , 0.4123232 ,
-4.361452 , 1.3948607 , 9.511665 , 0.11667654,
2.9079323 , 6.049952 , 9.275183 , -18.078873 ,
6.2983274 , -0.7500531 , -2.725033 , -7.6027865 ,
3.3404543 , 2.990815 , 4.010979 , 11.000591 ,
-2.8873312 , 7.1352735 , -16.79663 , 18.495346 ,
-14.293832 , 7.89578 , 2.2714825 , 22.976387 ,
-4.875734 , -3.0836344 , -2.9999814 , 13.751918 ,
6.448228 , -11.924197 , 2.171869 , 2.0423572 ,
-6.173772 , 10.778437 , 25.77281 , -4.9495463 ,
14.57806 , 0.3044315 , 2.6132357 , -7.591999 ,
-2.076944 , 9.025118 , 1.7834753 , -3.1799617 ,
-4.9401326 , 23.465864 , 5.1685796 , -9.018578 ,
9.037825 , -4.4150195 , 6.859591 , -12.274467 ,
-0.88911164, 5.186309 , -3.9988663 , -13.638606 ,
-9.925445 , -0.06329413, -3.6709652 , -12.397416 ,
-12.719869 , -1.395601 , 2.1150916 , 5.7381287 ,
-4.4691963 , -3.82819 , -0.84233856, -1.1604277 ,
-13.490127 , 8.731719 , -20.778936 , -11.495662 ,
5.8033476 , -4.752041 , 10.833007 , -6.717991 ,
4.504732 , 13.4244375 , 1.1306485 , 7.3435574 ,
1.400918 , 14.704036 , -9.501399 , 7.2315617 ,
-6.417456 , 1.3333273 , 11.872697 , -0.30664724,
8.8845 , 6.5569253 , 4.7948146 , 0.03662816,
-8.704245 , 6.224871 , -3.2701402 , -11.508579 ],
dtype=float32)}
[ -5.749211 9.505463 -8.200284 -5.2075014 5.3940268
-3.04878 1.611095 10.127234 -10.534177 -15.821609
1.2032688 -0.35080156 1.2629458 -12.643498 -2.5758228
-11.343508 2.3385992 -8.719341 14.213509 15.404744
-0.39327756 6.338786 2.688887 8.7104025 17.469526
-8.77959 7.0576906 4.648855 -1.3089896 -23.294737
8.013747 13.891729 -9.926753 5.655307 -5.9422326
-22.842539 0.6293588 -18.46266 -10.811862 9.8192625
3.0070958 3.8072643 -2.3861165 3.0821571 -14.739942
1.7594414 -0.6485091 4.485623 2.0207152 7.264915
-6.40137 23.63524 2.9711294 -22.708025 9.93719
20.354511 -10.324688 -0.700492 -8.783211 -5.27593
15.999649 3.3004563 12.747926 15.429879 4.7849145
5.6699696 -2.3826702 10.605882 3.9112158 3.1500628
15.859915 -2.1832209 -23.908653 -6.4799504 -4.5365124
-9.224193 14.568347 -10.568833 4.982321 -4.342062
0.0914714 12.645902 -5.74285 -3.2141201 -2.7173362
-6.680575 0.4757669 -5.035051 -6.7964664 16.865469
-11.54324 7.681869 0.44475392 9.708182 -8.932846
0.4123232 -4.361452 1.3948607 9.511665 0.11667654
2.9079323 6.049952 9.275183 -18.078873 6.2983274
-0.7500531 -2.725033 -7.6027865 3.3404543 2.990815
4.010979 11.000591 -2.8873312 7.1352735 -16.79663
18.495346 -14.293832 7.89578 2.2714825 22.976387
-4.875734 -3.0836344 -2.9999814 13.751918 6.448228
-11.924197 2.171869 2.0423572 -6.173772 10.778437
25.77281 -4.9495463 14.57806 0.3044315 2.6132357
-7.591999 -2.076944 9.025118 1.7834753 -3.1799617
-4.9401326 23.465864 5.1685796 -9.018578 9.037825
-4.4150195 6.859591 -12.274467 -0.88911164 5.186309
-3.9988663 -13.638606 -9.925445 -0.06329413 -3.6709652
-12.397416 -12.719869 -1.395601 2.1150916 5.7381287
-4.4691963 -3.82819 -0.84233856 -1.1604277 -13.490127
8.731719 -20.778936 -11.495662 5.8033476 -4.752041
10.833007 -6.717991 4.504732 13.4244375 1.1306485
7.3435574 1.400918 14.704036 -9.501399 7.2315617
-6.417456 1.3333273 11.872697 -0.30664724 8.8845
6.5569253 4.7948146 0.03662816 -8.704245 6.224871
-3.2701402 -11.508579 ]
```

### 4.Pretrained Models
Expand Down
Loading

0 comments on commit d60856b

Please sign in to comment.