Skip to content

Commit 102c84e

Browse files
author
huang18
committed
refactor until can run vae from scratch
1 parent 9452070 commit 102c84e

32 files changed

+1373
-302
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
data/vcc2018/wav
2+
data/vcc2018/bin
3+
data/vcc2018/stats
14
logdir
25
__init__.py
36
__pycache__

architectures/architecture-vae.json

+15-19
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
"model_module": "model.vae",
44
"trainer": "VAETrainer",
55
"trainer_module": "trainer.vae",
6-
"stat_dir": "/mnt/md1/datasets/vcc2018/world/etc-new",
7-
"spklist": "/mnt/md1/datasets/vcc2018/world/etc/speakers.tsv",
6+
"stats": "./data/vcc2018/stats/stats.h5",
7+
"spklist": "./data/vcc2018/conf/spk.list",
88

9+
"feat_type": "sp",
910
"z_dim": 16,
1011
"y_dim": 12,
1112
"y_emb_dim": 16,
@@ -24,10 +25,11 @@
2425
}
2526
},
2627
"training": {
27-
"train_file_pattern": ["/mnt/md1/datasets/vcc2018/world/bin-dynamic/VAD/tr/*/[12]00[0-6]?.bin",
28-
"/mnt/md1/datasets/vcc2018/world/bin-dynamic/VAD/tr/*/[12]0070.bin"],
29-
"valid_file_pattern": ["/mnt/md1/datasets/vcc2018/world/bin-dynamic/VAD/tr/*/[12]007[1-9].bin",
30-
"/mnt/md1/datasets/vcc2018/world/bin-dynamic/VAD/tr/*/[12]008[0-1].bin"],
28+
"feat_type": "sp",
29+
"train_file_pattern": ["./data/vcc2018/bin/VAD/*/[12]00[0-6]?.bin",
30+
"./data/vcc2018/bin/VAD/*/[12]0070.bin"],
31+
"valid_file_pattern": ["./data/vcc2018/bin/VAD/*/[12]007[1-9].bin",
32+
"./data/vcc2018/bin/VAD/*/[12]008[0-1].bin"],
3133
"batch_size": 16,
3234
"crop_length": 128,
3335
"lr": 1e-4,
@@ -39,30 +41,24 @@
3941
"log_freq": 1000
4042
},
4143
"conversion": {
42-
"input": "sp",
43-
"output": "sp",
44-
"test_file_pattern": "/mnt/md1/datasets/vcc2018/world/bin-dynamic/no_VAD/ev/{}/*.bin"
44+
"test_file_pattern": "./data/vcc2018/bin/no_VAD/{}/3*.bin"
4545
},
4646
"feat_param":{
4747
"fs": 22050,
4848
"shiftms": 5,
4949
"fftl": 1024,
5050
"mcep_alpha": 0.455,
51-
"sp_dim": 513,
52-
"mcc_dim": 34,
53-
"feat_dim": 2710,
51+
"mcep_dim": 34,
52+
"feat_dim": 1064,
5453
"dim":{
5554
"sp": 513,
56-
"mcc": 34,
57-
"feat": 2710
55+
"mcep": 34,
56+
"feat": 1064
5857
}
5958
},
60-
"normalizer_files":{
59+
"normalizer":{
6160
"sp": {
62-
"type": "minmax",
63-
"dim": null,
64-
"max": "sp_max.npf",
65-
"min": "sp_min.npf"
61+
"type": ["minmax"]
6662
}
6763
}
6864
}

convert.py

+78-31
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,60 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
# Convert FEATURES with trained models.
5+
# By Wen-Chin Huang 2019.06
6+
17
import json
28
import os
39

410
import tensorflow as tf
511
import numpy as np
612

7-
from datetime import datetime
813
from importlib import import_module
914

1015
import argparse
1116
import logging
1217

1318
import sys
1419
from preprocessing.vcc2018.feature_reader import Whole_feature_reader
15-
from preprocessing.normalizer import MinMaxScaler
20+
from preprocessing.normalizer import MinMaxScaler, StandardScaler
1621
from preprocessing.utils import read_hdf5, read_txt
1722
from util.wrapper import load, get_default_logdir_output
1823

1924
def main():
2025

2126
parser = argparse.ArgumentParser(
22-
description="convert files.")
27+
description="Conversion.")
2328
parser.add_argument(
2429
"--logdir", required=True, type=str,
2530
help="path of log directory")
2631
parser.add_argument(
2732
"--checkpoint", default=None, type=str,
2833
help="path of checkpoint")
34+
2935
parser.add_argument(
3036
"--src", default=None, required=True, type=str,
3137
help="source speaker")
3238
parser.add_argument(
3339
"--trg", default=None, required=True, type=str,
3440
help="target speaker")
35-
parser.add_argument('--stat_dir', type=str,
36-
default='/mnt/md1/datasets/vcc2018/world/etc-new',
37-
help='configuration directory')
38-
parser.add_argument('--file_pattern', type=str,
39-
default='/mnt/md1/datasets/vcc2018/world/bin-dynamic/no_VAD/ev/{}/*.bin',
40-
help='file pattern')
41+
parser.add_argument(
42+
"--type", default='test', type=str,
43+
help="test or valid (default is test)")
44+
45+
46+
parser.add_argument(
47+
"--input_feat", required=True,
48+
type=str, help="input feature type")
49+
parser.add_argument(
50+
"--output_feat", required=True,
51+
type=str, help="output feature type")
52+
parser.add_argument(
53+
"--mcd", action='store_true',
54+
help="calculate mcd or not")
55+
parser.add_argument(
56+
"--syn", action='store_true',
57+
help="synthesize voice or not")
4158
args = parser.parse_args()
4259

4360
# make exp directory
@@ -70,41 +87,42 @@ def main():
7087
module = import_module(arch['model_module'], package=None)
7188
MODEL = getattr(module, arch['model'])
7289

73-
input_feat = arch['conversion']['input']
90+
input_feat = args.input_feat
7491
input_feat_dim = arch['feat_param']['dim'][input_feat]
75-
output_feat = arch['conversion']['output']
92+
output_feat = args.output_feat
7693

7794
# read speakers
7895
spk_list = read_txt(arch['spklist'])
7996

8097
# Load statistics, normalize and NCHW
8198
normalizers = {}
82-
for k in arch['normalizer_files']:
83-
if (arch['normalizer_files'][k]['max'] is not None
84-
or arch['normalizer_files'][k]['max'] is not None):
85-
normalizer = MinMaxScaler(
86-
xmax=np.fromfile(os.path.join(arch['stat_dir'], arch['normalizer_files'][k]['max'])),
87-
xmin=np.fromfile(os.path.join(arch['stat_dir'], arch['normalizer_files'][k]['min'])),
88-
)
89-
normalizers[k] = normalizer
99+
for k in arch['normalizer']:
100+
normalizers[k] = {}
101+
for norm_type in arch['normalizer'][k]['type']:
102+
if norm_type == 'minmax':
103+
normalizer = MinMaxScaler(
104+
xmax=read_hdf5(arch['stats'], '/max/' + k),
105+
xmin=read_hdf5(arch['stats'], '/min/' + k),
106+
)
107+
elif norm_type == 'meanvar':
108+
normalizer = StandardScaler(
109+
mu=read_hdf5(arch['stats'], '/mean/' + k),
110+
std=read_hdf5(arch['stats'], '/scale/' + k),
111+
)
112+
113+
normalizers[k][norm_type] = normalizer
90114

91115
# Define placeholders
92116
x_pl = tf.placeholder(tf.float32, [None, input_feat_dim])
93-
if input_feat in normalizers:
94-
x = normalizers[input_feat].forward_process(x_pl)
95-
else:
96-
x = x_pl
97-
x = tf.expand_dims(tf.expand_dims(x, 1), -1)
117+
98118
yh_pl = tf.placeholder(dtype=tf.int64, shape=[1,])
99-
yh = yh_pl * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64)
119+
yh = yh_pl * tf.ones(shape=[tf.shape(x_pl)[0],], dtype=tf.int64)
120+
yh = tf.expand_dims(yh, 0)
100121

101122
# Define model
102-
model = MODEL(arch)
103-
z = model.encode(x)
123+
model = MODEL(arch, normalizers)
124+
z, _ = model.encode(x_pl)
104125
xh = model.decode(z, yh)
105-
xh = tf.squeeze(xh)
106-
if output_feat in normalizers:
107-
xh = normalizers[output_feat].backward_process(xh)
108126

109127
# make directories for output
110128
tf.gfile.MakeDirs(os.path.join(output_dir, 'latent'))
@@ -123,7 +141,16 @@ def main():
123141
_, ckpt = os.path.split(args.checkpoint)
124142
load(saver, sess, args.logdir, ckpt=ckpt)
125143

126-
files = sorted(tf.gfile.Glob(arch['conversion']['test_file_pattern'].format(args.src)))
144+
# get feature list, either validation set or test set
145+
if args.type == 'test':
146+
files = tf.gfile.Glob(arch['conversion']['test_file_pattern'].format(args.src))
147+
elif args.type == 'valid':
148+
files = []
149+
for p in arch['training']['valid_file_pattern']:
150+
files.extend(tf.gfile.Glob(p.replace('*', args.src)))
151+
files = sorted(files)
152+
153+
# conversion
127154
for f in files:
128155
basename = os.path.split(f)[-1]
129156
path_to_latent = os.path.join(output_dir, 'latent', '{}-{}-{}'.format(args.src, args.trg, basename))
@@ -143,6 +170,26 @@ def main():
143170
fp.write(latent.tostring())
144171
with open(path_to_cvt, 'wb') as fp:
145172
fp.write(cvt.tostring())
173+
174+
# optionally calculate MCD
175+
if args.mcd:
176+
cmd = "python ./mcd_calculate.py" + \
177+
" --type " + args.type + \
178+
" --logdir " + output_dir + \
179+
" --input_feat " + input_feat + \
180+
" --output_feat " + output_feat
181+
print(cmd)
182+
os.system(cmd)
183+
184+
# optionally synthesize waveform
185+
if args.syn:
186+
cmd = "python ./synthesize.py" + \
187+
" --type " + args.type + \
188+
" --logdir " + output_dir + \
189+
" --input_feat " + input_feat + \
190+
" --output_feat " + output_feat
191+
print(cmd)
192+
os.system(cmd)
146193

147194
if __name__ == '__main__':
148195
main()

data/download.sh

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
# download
4+
python download_speech_corpus.py downloader_conf/vcc2018.yml
5+
6+
# change directory names
7+
mv wav vcc2018
8+
cd vcc2018/wav
9+
find . -type d -name "VCC2*" | while read f; do mv $f $(echo $f | sed 's/VCC2//'); done
10+
cd ../../

0 commit comments

Comments
 (0)