From cf324178982d5a96cfd6a8384b8dfd077d9eab50 Mon Sep 17 00:00:00 2001 From: me Date: Mon, 18 May 2020 15:29:56 +0800 Subject: [PATCH] final --- .gitignore | 12 +++ README.md | 27 ++++++ beamdecode.py | 48 +++++++++++ data.py | 97 +++++++++++++++++++++ decoder.py | 135 ++++++++++++++++++++++++++++++ docs/compare.md | 71 ++++++++++++++++ docs/demo.md | 12 +++ docs/embedding.md | 34 ++++++++ docs/lm.md | 94 +++++++++++++++++++++ docs/train.md | 67 +++++++++++++++ examples/_init_path.py | 14 ++++ examples/demo-client.py | 18 ++++ examples/demo-recognize.py | 10 +++ examples/demo-record-recognize.py | 13 +++ examples/demo-server.py | 23 +++++ examples/embedding.py | 54 ++++++++++++ examples/record.py | 39 +++++++++ examples/train.py | 8 ++ feature.py | 36 ++++++++ images/embedding.png | Bin 0 -> 37969 bytes images/train.svg | 1 + models/__init__.py | 0 models/base.py | 44 ++++++++++ models/conv.py | 68 +++++++++++++++ models/trainable.py | 127 ++++++++++++++++++++++++++++ requirements.txt | 3 + train.py | 118 ++++++++++++++++++++++++++ 27 files changed, 1173 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 beamdecode.py create mode 100644 data.py create mode 100644 decoder.py create mode 100644 docs/compare.md create mode 100644 docs/demo.md create mode 100644 docs/embedding.md create mode 100644 docs/lm.md create mode 100644 docs/train.md create mode 100644 examples/_init_path.py create mode 100644 examples/demo-client.py create mode 100644 examples/demo-recognize.py create mode 100644 examples/demo-record-recognize.py create mode 100644 examples/demo-server.py create mode 100644 examples/embedding.py create mode 100644 examples/record.py create mode 100644 examples/train.py create mode 100644 feature.py create mode 100644 images/embedding.png create mode 100644 images/train.svg create mode 100644 models/__init__.py create mode 100644 models/base.py create mode 100644 models/conv.py create mode 100644 models/trainable.py create mode 100644 requirements.txt create mode 100644 train.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d465ea --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +* +!examples +!models +!*.py +!.gitignore +!*.md +!requirements.txt +!docs/ +!images +!*.svg +!*.png +!LICENSE* \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5a7f885 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# MASR 中文语音识别 + +**MASR**是一个基于**端到端的深度神经网络**的**中文普通话语音识别**项目。 + +## 原理 + +MASR使用的是门控卷积神经网络(Gated Convolutional Network),网络结构类似于Facebook在2016年提出的Wav2letter。但是使用的激活函数不是`ReLU`或者是`HardTanh`,而是`GLU`(门控线性单元)。因此称作门控卷积网络。根据我的实验,使用`GLU`的收敛速度比`HardTanh`要快。如果你想要研究卷积网络用于语音识别的效果,这个项目可以作为一个参考。 + +**以下用字错误率CER来衡量模型的表现,CER = 编辑距离 / 句子长度,越低越好** + +**大致可以理解为 1 - CER 就是识别准确率。** + +模型使用AISHELL-1数据集训练,共150小时的录音,覆盖了4000多个汉字。**工业界使用的语音识别系统通常使用至少10倍于本项目的录音数据来训练,同时使用特定场景的语料来训练语言模型**,所以,不要期待本项目可以和工业界的识别效果媲美。这对于Github上任何个人项目来说都不现实,除非有更先进的技术诞生。 + +*什么叫特定场景的语料训练的语言模型?比如你使用游戏中的语音识别,它更倾向于将你的话识别成你在玩游戏时可能说的话,比如「貂蝉被蓝打死了」。而在其他场景下,「貂蝉被蓝打死了」根本就不是一句通顺的话。不信你和一个只读过三国演义没玩过王者荣耀的人说「貂蝉被蓝打死了」,你确定ta不会反问你:「啥?貂蝉被谁打死了?lan是谁?」* + +在单卡GTX 1080Ti上,模型每迭代一个epoch大约需要20分钟。(实验室的CUDA版本较低,不排除更新CUDA版本后会快一些的可能。) + + + +上图为验证集的CER随epoch的训练曲线。可以看到,目前验证集CER已经下降到11%。 + +图中没有显示测试集的表现。测试集的CER稍高一些,在14%。 + +通过外接语言模型可以将测试集的CER降低到8%。 + +项目目前提供的预训练模型训练了大约是100个epoch时候的,已经接近最好了。 \ No newline at end of file diff --git a/beamdecode.py b/beamdecode.py new file mode 100644 index 0000000..6c97d25 --- /dev/null +++ b/beamdecode.py @@ -0,0 +1,48 @@ +import _init_path +import torch +import feature +from models.conv import GatedConv +import torch.nn.functional as F +from ctcdecode import CTCBeamDecoder + +alpha = 0.8 +beta = 0.3 +lm_path = "lm/zh_giga.no_cna_cmn.prune01244.klm" +cutoff_top_n = 40 +cutoff_prob = 1.0 +beam_width = 32 +num_processes = 4 +blank_index = 0 + +model = GatedConv.load("pretrained/gated-conv.pth") +model.eval() + +decoder = CTCBeamDecoder( + model.vocabulary, + lm_path, + alpha, + beta, + cutoff_top_n, + cutoff_prob, + beam_width, + num_processes, + blank_index, +) + + +def translate(vocab, out, out_len): + return "".join([vocab[x] for x in out[0:out_len]]) + + +def predict(f): + wav = feature.load_audio(f) + spec = feature.spectrogram(wav) + spec.unsqueeze_(0) + with torch.no_grad(): + y = model.cnn(spec) + y = F.softmax(y, 1) + y_len = torch.tensor([y.size(-1)]) + y = y.permute(0, 2, 1) # B * T * V + print("decoding") + out, score, offset, out_len = decoder.decode(y, y_len) + return translate(model.vocabulary, out[0][0], out_len[0][0]) diff --git a/data.py b/data.py new file mode 100644 index 0000000..ba8037a --- /dev/null +++ b/data.py @@ -0,0 +1,97 @@ +import torch +import librosa +import wave +import numpy as np +import scipy +import json +from torch.utils.data import DataLoader +from torch.utils.data import Dataset + +sample_rate = 16000 +window_size = 0.02 +window_stride = 0.01 +n_fft = int(sample_rate * window_size) +win_length = n_fft +hop_length = int(sample_rate * window_stride) +window = "hamming" + + +def load_audio(wav_path, normalize=True): # -> numpy array + with wave.open(wav_path) as wav: + wav = np.frombuffer(wav.readframes(wav.getnframes()), dtype="int16") + wav = wav.astype("float") + if normalize: + return (wav - wav.mean()) / wav.std() + else: + return wav + + +def spectrogram(wav, normalize=True): + D = librosa.stft( + wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window + ) + + spec, phase = librosa.magphase(D) + spec = np.log1p(spec) + spec = torch.FloatTensor(spec) + + if normalize: + spec = (spec - spec.mean()) / spec.std() + + return spec + + +class MASRDataset(Dataset): + def __init__(self, index_path, labels_path): + with open(index_path) as f: + idx = f.readlines() + idx = [x.strip().split(",", 1) for x in idx] + self.idx = idx + with open(labels_path) as f: + labels = json.load(f) + self.labels = dict([(labels[i], i) for i in range(len(labels))]) + self.labels_str = labels + + def __getitem__(self, index): + wav, transcript = self.idx[index] + wav = load_audio(wav) + spect = spectrogram(wav) + transcript = list(filter(None, [self.labels.get(x) for x in transcript])) + + return spect, transcript + + def __len__(self): + return len(self.idx) + + +def _collate_fn(batch): + def func(p): + return p[0].size(1) + + batch = sorted(batch, key=lambda sample: sample[0].size(1), reverse=True) + longest_sample = max(batch, key=func)[0] + freq_size = longest_sample.size(0) + minibatch_size = len(batch) + max_seqlength = longest_sample.size(1) + inputs = torch.zeros(minibatch_size, freq_size, max_seqlength) + input_lens = torch.IntTensor(minibatch_size) + target_lens = torch.IntTensor(minibatch_size) + targets = [] + for x in range(minibatch_size): + sample = batch[x] + tensor = sample[0] + target = sample[1] + seq_length = tensor.size(1) + inputs[x].narrow(1, 0, seq_length).copy_(tensor) + input_lens[x] = seq_length + target_lens[x] = len(target) + targets.extend(target) + targets = torch.IntTensor(targets) + return inputs, targets, input_lens, target_lens + + +class MASRDataLoader(DataLoader): + def __init__(self, *args, **kwargs): + super(MASRDataLoader, self).__init__(*args, **kwargs) + self.collate_fn = _collate_fn + diff --git a/decoder.py b/decoder.py new file mode 100644 index 0000000..345d5f5 --- /dev/null +++ b/decoder.py @@ -0,0 +1,135 @@ +import Levenshtein as Lev +import torch +from six.moves import xrange + + +class Decoder(object): + """ + Basic decoder class from which all other decoders inherit. Implements several + helper functions. Subclasses should implement the decode() method. + + Arguments: + labels (string): mapping from integers to characters. + blank_index (int, optional): index for the blank '_' character. Defaults to 0. + space_index (int, optional): index for the space ' ' character. Defaults to 28. + """ + + def __init__(self, labels, blank_index=0): + # e.g. labels = "_'ABCDEFGHIJKLMNOPQRSTUVWXYZ#" + self.labels = labels + self.int_to_char = dict([(i, c) for (i, c) in enumerate(labels)]) + self.blank_index = blank_index + """ + space_index = len(labels) # To prevent errors in decode, we add an out of bounds index for the space + if ' ' in labels: + space_index = labels.index(' ') + self.space_index = space_index + """ + + def wer(self, s1, s2): + """ + Computes the Word Error Rate, defined as the edit distance between the + two provided sentences after tokenizing to words. + Arguments: + s1 (string): space-separated sentence + s2 (string): space-separated sentence + """ + + # build mapping of words to integers + b = set(s1.split() + s2.split()) + word2char = dict(zip(b, range(len(b)))) + + # map the words to a char array (Levenshtein packages only accepts + # strings) + w1 = [chr(word2char[w]) for w in s1.split()] + w2 = [chr(word2char[w]) for w in s2.split()] + + return Lev.distance("".join(w1), "".join(w2)) + + def cer(self, s1, s2): + """ + Computes the Character Error Rate, defined as the edit distance. + + Arguments: + s1 (string): space-separated sentence + s2 (string): space-separated sentence + """ + s1, s2, = s1.replace(" ", ""), s2.replace(" ", "") + return Lev.distance(s1, s2) + + def decode(self, probs, sizes=None): + """ + Given a matrix of character probabilities, returns the decoder's + best guess of the transcription + + Arguments: + probs: Tensor of character probabilities, where probs[c,t] + is the probability of character c at time t + sizes(optional): Size of each sequence in the mini-batch + Returns: + string: sequence of the model's best guess for the transcription + """ + raise NotImplementedError + + +class GreedyDecoder(Decoder): + def __init__(self, labels, blank_index=0): + super(GreedyDecoder, self).__init__(labels, blank_index) + + def convert_to_strings( + self, sequences, sizes=None, remove_repetitions=False, return_offsets=False + ): + """Given a list of numeric sequences, returns the corresponding strings""" + strings = [] + offsets = [] if return_offsets else None + for x in xrange(len(sequences)): + seq_len = sizes[x] if sizes is not None else len(sequences[x]) + string, string_offsets = self.process_string( + sequences[x], seq_len, remove_repetitions + ) + strings.append([string]) # We only return one path + if return_offsets: + offsets.append([string_offsets]) + if return_offsets: + return strings, offsets + else: + return strings + + def process_string(self, sequence, size, remove_repetitions=False): + string = "" + offsets = [] + for i in range(size): + char = self.int_to_char[sequence[i].item()] + if char != self.int_to_char[self.blank_index]: + # if this char is a repetition and remove_repetitions=true, then skip + if ( + remove_repetitions + and i != 0 + and char == self.int_to_char[sequence[i - 1].item()] + ): + pass + else: + string = string + char + offsets.append(i) + return string, torch.tensor(offsets, dtype=torch.int) + + def decode(self, probs, sizes=None): + """ + Returns the argmax decoding given the probability matrix. Removes + repeated elements in the sequence, as well as blanks. + + Arguments: + probs: Tensor of character probabilities from the network. Expected shape of batch x seq_length x output_dim + sizes(optional): Size of each sequence in the mini-batch + Returns: + strings: sequences of the model's best guess for the transcription on inputs + offsets: time step per character predicted + """ + _, max_probs = torch.max(probs, 2) + strings, offsets = self.convert_to_strings( + max_probs.view(max_probs.size(0), max_probs.size(1)), + sizes, + remove_repetitions=True, + return_offsets=True, + ) + return strings, offsets diff --git a/docs/compare.md b/docs/compare.md new file mode 100644 index 0000000..1f4b2b2 --- /dev/null +++ b/docs/compare.md @@ -0,0 +1,71 @@ +# MASR对比Github其他项目 + +Github上的中文语音识别项目质量良莠不齐,本文为你提供一个选择的参考。 + +本文将MASR和以下Github上Star数较高的中文语音识别项目进行了对比 + +1. ASRT:https://github.com/nl8590687/ASRT_SpeechRecognition +2. 项目2:https://github.com/xxbb1234021/speech_recognition +3. DeepSpeech:https://github.com/PaddlePaddle/DeepSpeech + +在正式对比之前,我想解释下一些概念。 + +## 训练集和测试集 + +通常来说,一个语音识别模型在训练集、测试集、实际使用中的效果满足如下关系: + +* 训练集效果 > 测试集效果 > 实际使用的效果 + +以下对比的时候凡是提到识别效果,指的都是测试集识别效果。 + +本项目使用的测试集是AISHELL-1的测试集。 + +实际使用的效果都会比测试集效果要差。 + +## 和ASRT对比 + +| | ASRT | MASR | +| ------------------ | --------------- | -------------------------------------------- | +| 是否提供预训练模型 | 是 | 是 | +| 预训练模型大小 | **20MB** | 120MB | +| 识别效果 | 拼音准确率:80% | **汉字准确率:86%**(加入语言模型后**92%**) | +| 基于框架 | keras | pytorch | +| 是否端到端 | 是[?] | **是** | +| 支持自行录音识别 | 是 | 是 | + +* ASRT预训练模型很小,下载方便,这点确实是优势。 +* 汉字准确率作为评价标准比拼音准确率要难。 +* 尽管ASRT声称是端到端的,但实际上是语音转拼音,再拼音转文字。 +* 这里的准确率指的是(1 - 字错误率) + +## 和项目2对比 + +项目2把thchs-30的测试集拿去当训练集用了,只有**6个小时**的录音时长。完全的过拟合,毫无泛化能力,简单地说就是,**根本不能用**。 + +项目2的首页在展示的时候用的是**已经被训练过**的「测试集」上的识别效果,毫无意义。 + +项目2的issue页里基本都是踩坑的小白。 + +| | 项目2 | MASR | +| ------------------ | --------------------------- | -------------------------------------------- | +| 是否提供预训练模型 | 要去issue页里面翻,可以翻到 | **是** | +| 预训练模型大小 | 110MB | 120MB | +| 识别效果 | 没有识别语音的能力 | **汉字准确率:86%**(加入语言模型后**92%**) | + +## 和DeepSpeech对比 + +项目3是百度官方的。质量上确实比个人项目高很多,我也测试过。如果你比较有经验,推荐使用百度的这个项目。 + +DeepSpeech提供了分别提供了中英文语音识别的预训练模型。其中中文语音识别模型提供了2个,一个基于AISHELL-1数据集训练(和本项目一样),一个基于百度内部的1200小时的中文数据集训练(钞能力)。百度给出了后者在内部测试集上的表现,大约是**87%的准确率**。 + +| | DeepSpeech | MASR | +| ------------------ | --------------------- | ---------------------- | +| 基于框架 | paddlepaddle | pytorch | +| 是否提供预训练模型 | 是 | 是 | +| 模型结构 | CNN + RNN(GRU) | CNN only | +| 预训练模型大小 | 750MB | **120MB** | +| 预训练数据集大小 | **1200小时** | 150小时 | +| 外部语言模型 | **有** | 无 | +| 识别效果 | **87%(内部数据集)** | 92%(AISHELL-1测试集) | + +注意:百度内部数据集上的**87%**准确率比本项目的在AISHELL-1测试集上的**92%**准确率要更好,因为它的测试集要丰富得多。个人估计,百度预训练模型在AISHELL-1上的准确率应该是可以做到95%左右的,但它没有公布。 \ No newline at end of file diff --git a/docs/demo.md b/docs/demo.md new file mode 100644 index 0000000..aa56764 --- /dev/null +++ b/docs/demo.md @@ -0,0 +1,12 @@ +# 识别自己的语音 + +识别自己的语音需要额外安装一个依赖:pyaudio + +参考[pyaudio官网](https://people.csail.mit.edu/hubert/pyaudio/)把它装上,然后执行以下命令即可。 + +```sh +python examples/demo-record-recognize.py +``` + +请在看到提示「录音中」后开始说话,你有5秒钟的说话时间(可以自己在源码中更改)。 + diff --git a/docs/embedding.md b/docs/embedding.md new file mode 100644 index 0000000..c73e4e3 --- /dev/null +++ b/docs/embedding.md @@ -0,0 +1,34 @@ +# 使用声学嵌入寻找音近字 + +你可能听说过**词嵌入**(word embedding)。作为神经语言模型的底层,词嵌入可以将词映射到一个低维连续空间中,并且使意思相近或相关的词有着较近的cosine距离,通过比较这个距离,可以找出一个词的近义词。 + +我们使用类似的思想将汉字嵌入到一个(较)低维的连续空间中,并使读音相近的汉字有着相近的cosine距离,并根据cosine距离来寻找音近字。如图,我们找到了嵌入空间中离「掉」最近的5个字,它们的读音也确实与「掉」接近。 + +![声学嵌入](../images/embedding.png) + +## 使用MASR获取声学嵌入 + +**我们需要重新训练一个模型吗?** + +**不需要!** + +使用MASR的预训练模型,我们就能构造出这样一个声学嵌入(acoustic embedding)来。 + +在MASR的输出层有4334个单元,分别表示4333个汉字(严格地说不完全是汉字)和1个CTC的特殊标记。 + +在输出层之前有一个隐藏层,包含1000个单元。 + +连接他们的是一个$4334 \times 1000$的矩阵$W_{ij}$,对于第$i$个汉字,使用向量$W_i$作为它的嵌入向量即可。 + +为什么可以这么做呢?我先不说,留给你来思考~ + +使用声学嵌入给了我们直观判断一个声学模型是否合理的方式,你会发现,大多数情况下,MASR的预训练模型给出的音近字都是较为准确的。 + +## 自己试试 + +执行以下文件,多试几个汉字,看准不准。 + +```sh +python examples/embedding.py +``` + diff --git a/docs/lm.md b/docs/lm.md new file mode 100644 index 0000000..732108a --- /dev/null +++ b/docs/lm.md @@ -0,0 +1,94 @@ +# 使用语言模型提升识别率 + +我能理解,当你看到「你好很高兴认识你」被识别成「利好很高性任实米」时,一定会感觉很失望。甚至觉得我在骗你:这怎么可能是Github上效果最好的个人项目? + +也许你看到别人的首页上展示的识别效果都是全对的,感觉很厉害。其实它们展示的是测试集上部分样本的效果,测试集跟训练集是同分布的,效果自然不会差。本项目在测试集上的识别率高达85%(不含语言模型的情况下),拿出来一点也不虚。甚至有一些项目展示的是训练集上的效果,这就像高考考做过的题一样,毫无意义。 + +当然,我说了那么多,并不是要告诉你,只能做成这样了,别再奢求更好的结果了。不!**我们可以把「你好很高兴认识你」识别到全对,还能把测试集上的识别率提升到92%甚至更高。**而我们需要的是一个好的语言模型。 + +## 什么是语言模型 + +如你所见,「利好很高性任实米」根本就不是一句通顺的话,但是我们的神经网络并没有意识到这一点。为什么呢?也许你不相信,因为它从来没有人对它说过「你好」这句话。训练集中没有「你好」这句话,也没有「很高兴认识你」。它不知道这些话是合理的句子(至少比「利好很高性任实米」更合理。 + +而语言模型是通过外部语料库训练得到的,这些语料库包含大量的句子,语言模型知道「你好很高兴认识你」比「利好很高性任实米」更像一个句子。换句话说,神经网络的第一个字输出了「利」,但是「你」的概率也并不低,而语言模型可以纠正它的错误,告诉它,真正该输出的是「你」。 + +下面我将教你如何一步步加入语言模型,完成后你将会得到一个可以说是惊喜的识别效果。**别担心,不需要写任何代码**。 + +## 添加语言模型 + +在你尝试添加语言模型之前,请确认你已经安装了`pyaudio`,参见[识别自己的语音](demo.md)。 + +同时,你还需要安装Flask,这很简单,`pip install flask`即可。 + +好了,让我们给本项目加入一个来自百度的语言模型。 + +一个现成的可以使用的语言模型来自百度,你需要[下载它](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm)。 + +这一步很简单,因为从百度下载很快。 + +下载完成后,执行 + +```sh +cd masr +mkdir lm/ +cp ~/Downloads/zh_giga.no_cna_cmn.prune01244.klm lm/ +``` + +将语言模型拷贝到`masr/lm`目录下。 + +下面是最麻烦的一步了,做好准备。 + +我们需要安装这个依赖:[ctcdecode](https://github.com/parlance/ctcdecode),这是一个高效的beam search解码器。 + +按照它的README,安装它本来很简单,你需要执行: + +```sh +git clone --recursive https://github.com/parlance/ctcdecode.git +cd ctcdecode +pip install . +``` + +但可能会遇到报错,原因在于它在安装过程中会去下载两个文件,而这两个文件位于Google的服务器上。你可能需要魔法才能访问。而你的命令行可能不会自动使用魔法。 + +以下是`build.py`中下载文件部分的代码 + +```python +# Download/Extract openfst, boost +download_extract('https://sites.google.com/site/openfst/home/openfst-down/openfst-1.6.7.tar.gz', + 'third_party/openfst-1.6.7.tar.gz') +download_extract('https://dl.bintray.com/boostorg/release/1.67.0/source/boost_1_67_0.tar.gz', + 'third_party/boost_1_67_0.tar.gz') +``` + +你需要自行下载这两个文件,并把它们解压到`third_party`目录下,然后注释掉这两行。再次执行上述的安装命令,即可成功安装。 + +好了,恭喜你,已经完成了所以依赖的安装,现在,启动服务器 + +```sh +python examples/demo-server.py +``` + +然后,请将`examples/demo-client.py`中的服务器地址的ip部分改成你的服务器ip,如果你都是在本机上进行的,则不需要修改,使用默认的`localhost`即可。 + +## 感受喜悦 + +好了,执行 + +```sh +python examples/demo-client.py +``` + +在看到「录音中」的提示后,开始说话。 + +如果想不到说什么,不妨念「举头望明月,低头思故乡」试试。 + +接下来,就是见证奇迹的时刻。 + +``` +(python) ➜ masr git:(master) ✗ python examples/demo-client.py +录音中(5s) +.................................................. +识别结果: +我想找一个漂亮的女朋友 +``` + diff --git a/docs/train.md b/docs/train.md new file mode 100644 index 0000000..74af65a --- /dev/null +++ b/docs/train.md @@ -0,0 +1,67 @@ +# 训练MASR模型 + +MASR基于pytorch,`MASRModel`是`torch.nn.Module`的子类。这将给熟悉`pytorch`的用户带来极大的方便。 + +使用MASR的训练功能需要安装以下额外的依赖,既然你浏览到了这里,这些依赖你一定能自行搞定! + +* `levenshtein-python` + + 计算CER中的编辑距离 + +* `warpctc_pytorch` + + 百度的高性能CTC正反向传播实现的pytorch接口 + +* `tqdm` + + 进度显示 + +* `tensorboardX` + + 为pytorch提供tensorboard支持 + +* `tensorboard` + + 实时查看训练曲线 + +当然,相信你也有GPU,否则训练将会变得很慢。 + +**通常,神经网络的训练比搭建要困难得多,然而MASR为你搞定了所有复杂的东西,使用MASR进行训练非常方便。** + +如果你只想要使用MASR内置的门卷积网络`GatedConv`的话,首先初始化一个`GatedConv`对象。 + +```python +from models.conv import GatedConv + +model = GatedConv(vocabulary) +``` + +你需要传入向它`vocabulary`,这是一个字符串,包含你的数据集中所有的汉字。但是注意,`vocabulary[0]`应该被设置成一个无效字符,用于表示CTC中的空标记。 + +之后,使用`to_train`方法将`model`转化成一个可以训练的对象。 + +```python +model.to_train() +``` + +此时`model`则变成可训练的了,使用`fit`方法来进行训练。 + +```python +model.fit('train.index', 'dev.index', epoch=10) +``` + +`epoch`表示你想要训练几次,而`train.index`和`dev.index`应该分别为训练数据集和开发数据集(验证集或测试集)的索引文件。 + +索引文件应具有如下的简单格式: + +```python +/path/to/audio/file0.wav,我爱你 +/path/to/audio/file1.wav,你爱我吗 +... +``` + +左边是音频文件路径,右边是对应的标注,用逗号(英文逗号)分隔。 + +`model.fit`方法还包含学习率、batch size、梯度裁剪等等参数,可以根据需要调整,建议使用默认参数。 + +完整的训练流程参见[train.py](/examples/train.py)。 \ No newline at end of file diff --git a/examples/_init_path.py b/examples/_init_path.py new file mode 100644 index 0000000..76672a7 --- /dev/null +++ b/examples/_init_path.py @@ -0,0 +1,14 @@ +import os.path +import sys + + +def add_path(path): + if path not in sys.path: + sys.path.insert(0, path) + + +this_dir = os.path.dirname(__file__) + +# Add project path to PYTHONPATH +proj_path = os.path.join(this_dir, "..") +add_path(proj_path) diff --git a/examples/demo-client.py b/examples/demo-client.py new file mode 100644 index 0000000..5f4f883 --- /dev/null +++ b/examples/demo-client.py @@ -0,0 +1,18 @@ +import requests +import _init_path +import feature +from record import record + +server = "http://localhost:5000/recognize" + +record("record.wav", time=5) # modify time to how long you want + +f = open("record.wav", "rb") + +files = {"file": f} + +r = requests.post(server, files=files) + +print("") +print("识别结果:") +print(r.text) diff --git a/examples/demo-recognize.py b/examples/demo-recognize.py new file mode 100644 index 0000000..bafb0ed --- /dev/null +++ b/examples/demo-recognize.py @@ -0,0 +1,10 @@ +import _init_path +from models.conv import GatedConv + +model = GatedConv.load("pretrained/gated-conv.pth") + +text = model.predict("test.wav") + +print("") +print("识别结果:") +print(text) diff --git a/examples/demo-record-recognize.py b/examples/demo-record-recognize.py new file mode 100644 index 0000000..3089155 --- /dev/null +++ b/examples/demo-record-recognize.py @@ -0,0 +1,13 @@ +import _init_path +from models.conv import GatedConv +from record import record + +model = GatedConv.load("pretrained/gated-conv.pth") + +record("record.wav", time=5) # modify time to how long you want + +text = model.predict("record.wav") + +print("") +print("识别结果:") +print(text) diff --git a/examples/demo-server.py b/examples/demo-server.py new file mode 100644 index 0000000..9636c26 --- /dev/null +++ b/examples/demo-server.py @@ -0,0 +1,23 @@ +from flask import Flask, request +import _init_path +from models.conv import GatedConv +import sys +import json + +print("Loading model...") + +import beamdecode + +print("Model loaded") + +app = Flask(__name__) + + +@app.route("/recognize", methods=["POST"]) +def recognize(): + f = request.files["file"] + f.save("test.wav") + return beamdecode.predict("test.wav") + + +app.run("0.0.0.0", debug=True) diff --git a/examples/embedding.py b/examples/embedding.py new file mode 100644 index 0000000..e88014c --- /dev/null +++ b/examples/embedding.py @@ -0,0 +1,54 @@ +import _init_path +from models.conv import GatedConv +import numpy as np +import torch +import heapq +from torch.nn.utils import remove_weight_norm + +torch.set_grad_enabled(False) + +model = GatedConv.load("pretrained/gated-conv.pth") +model.eval() + +conv = model.cnn[10] +remove_weight_norm(conv) + + +w = conv.weight.squeeze().detach() +b = conv.bias.unsqueeze(1).detach() + +embed = w + +vocab = model.vocabulary +v = dict((vocab[i], i) for i in range(len(vocab))) + + +def cos(c1, c2): + e1, e2 = embed[v[c1]], embed[v[c2]] + return (e1 * e2).sum() / (e1.norm() * e2.norm()) + + +def nearest(c, n=5): + def gen(): + for c_ in v: + if c_ == c: + continue + yield cos(c, c_), c_ + + return heapq.nlargest(n, gen()) + + +def main(): + while True: + c = input("请输入一个汉字:") + if c not in v: + print(f"词汇表里没有「{c}」") + continue + print("以下是cos相似度最高的") + for p, c in nearest(c): + print(c, end=", ") + print("") + + +if __name__ == "__main__": + main() diff --git a/examples/record.py b/examples/record.py new file mode 100644 index 0000000..5186842 --- /dev/null +++ b/examples/record.py @@ -0,0 +1,39 @@ +import pyaudio +import wave + +framerate = 16000 +NUM_SAMPLES = 2000 +channels = 1 +sampwidth = 2 +TIME = 10 + + +def save_wave_file(filename, data): + wf = wave.open(filename, "wb") + wf.setnchannels(channels) + wf.setsampwidth(sampwidth) + wf.setframerate(framerate) + wf.writeframes(b"".join(data)) + wf.close() + + +def record(f, time=5): + p = pyaudio.PyAudio() + stream = p.open( + format=pyaudio.paInt16, + channels=1, + rate=framerate, + input=True, + frames_per_buffer=NUM_SAMPLES, + ) + my_buf = [] + count = 0 + print("录音中(5s)") + while count < TIME * time: + string_audio_data = stream.read(NUM_SAMPLES) + my_buf.append(string_audio_data) + count += 1 + print(".", end="", flush=True) + + save_wave_file(f, my_buf) + stream.close() diff --git a/examples/train.py b/examples/train.py new file mode 100644 index 0000000..94e7286 --- /dev/null +++ b/examples/train.py @@ -0,0 +1,8 @@ +import _init_path +from models.conv import GatedConv + +model = GatedConv.load("pretrained/gated-conv.pth") + +model.to_train() + +model.fit("train.manifest", "train.manifest") diff --git a/feature.py b/feature.py new file mode 100644 index 0000000..0193b16 --- /dev/null +++ b/feature.py @@ -0,0 +1,36 @@ +import librosa +import wave +import numpy as np +import torch + +sample_rate = 16000 +window_size = 0.02 +window_stride = 0.01 +n_fft = int(sample_rate * window_size) +win_length = n_fft +hop_length = int(sample_rate * window_stride) +window = "hamming" + + +def load_audio(wav_path, normalize=True): # -> numpy array + with wave.open(wav_path) as wav: + wav = np.frombuffer(wav.readframes(wav.getnframes()), dtype="int16") + wav = wav.astype("float") + if normalize: + wav = (wav - wav.mean()) / wav.std() + return wav + + +def spectrogram(wav, normalize=True): + D = librosa.stft( + wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window + ) + + spec, phase = librosa.magphase(D) + spec = np.log1p(spec) + spec = torch.FloatTensor(spec) + + if normalize: + spec = (spec - spec.mean()) / spec.std() + + return spec diff --git a/images/embedding.png b/images/embedding.png new file mode 100644 index 0000000000000000000000000000000000000000..893f07acdbdf57b0914ed330ee2c26a95915587b GIT binary patch literal 37969 zcmbUHWmH^2vo;I^K?VzhTkzl(Ah;9U-CcvbyAKY*J-EBOOMu`m!GpW|JLEpkxzAeP zzi-x>wRhL9s_v@p-Mjm`I!sg6r0RjR7SwdV`5ds2A7y<$k0ssq^DE63lKtLdg zn}7QzFY)agvAlz=skxO21cZ24q8gmC(l5L$Ek%j%{s3|Kjxfc1Dfk=_U@XldK!P}@ zKNhHb%rHvR?1Bo3-%69auL+?+d$_C)-Dc@dm77~00fSFttEjJY*M2wtG?B`^{<4>S z#^`$5!2+?be3d_~t&Hu*BjHcSM;gRR%^D?g2!%5VC&{#Pv8(8!tzCa#z;JVL>g_2GgLV)!eeToR0*3;Jvw!h=Sv*b`QNC7QoFl;20(ErCuDr5^{{ zmV&!GO9IX^WN2iSTkY#$D%04nZ4+c8X9gOQ(D2=9?LWtFb)Vj}E*&tx@g3{ug&D_P zjAR(B+3b#i6bM~PN8W;hK_BA&gz&vY}#*mKV^}_LuUXm8{x2u^5&H*yezBd&l{F7 zw7jRSsSD97NC zOaDYj;s=FTxsoOjtt$0NG7Z0|W~iih6;SX8!BENFAi$J?P#pq0KaD6R4y&KMbi1FW z)_r?>_F|(>;OmB;d^Z%rB6BxVEONhfM^wkTEHBzeHv!mMKVb9suisb{mC4ZDWFSN~ zP(VQfd_0W8tTfQU@^9|yWu2{l(sSQ)EJQ!LA=wj(!^Y?{65gW2IBYfx4Uj&By7EP` zVzcc=`W`OIqgPB(g}AA6eM{>9Vo1gG14q`WU*=q3l_(};L4R@8o@mvTYPsteJ_ep3 zC^YCONVfOnU~8b(_R_!~{wKNzU|}cY!g;)z!1$*B+d0HAN{c;(4V3nI%_KTLNfCz!vWkHJI;z*Bvo?ctJ#XA88}$6iGI=qGB2 zkpa~eAoY_#8_sp(@Bnr~kjM=Y76?s3;uPeMLxu&S-->J|;}a)F3lS!}5U2SOZA^kD z>^~;b`E4Y2PMo|JvqZ#4G@7_57Ev6|0pH#4nz$-9hAL!Q{=+O%!?&TId{p>RLg-T^ z-?7qSuyO|W9qOSf1n+(#?^9o+c@UfkXXm}(bk~2OFb!+b=fi=E7?`e>mJaznBw6E7 z4qG;qw&~az?rhkx2$un3vBNrrQnUM<6?Aps<;B;DRMqdgiF}XyO!EqH*4G8xhgJ$; z>51tkmnJnvHARyI>;@48^##?PTOU%ilcR-6=kiYBcSe=;TWx*XV%`$n;@grigsX{I zlp~Fclo-#4P^1eJqajX=WsALym5r^8ZIw9trD=><%gvdQA(|A69Y6Z3?bpyRwPCs8 z4;VV(^aC+yQmFE#N~-dWN@>)|)N2WlrG#@?cmgu{rb^ffw8b~Gb+c`=3!0jms+yge z=negvhz&ILx0aa;9zSpV3*{Vh?Y}F1M_6`PmR*)uwqCa6%Cr$fDJ+^o+#fw?xSqLw zy1rXRI+VjzrJI+Tccr#y1f|Ye1umJ_={gp=SGzYpXkKmq<;6mZ z5J0RM(2PLGnotj+W1~%0>EY@-tozy!S+A^a(_mHisk*jFvR2coVd5~0b#8fxYsRC( zIl?(RD)={adJCH~Ti1%BmA92hSGXP*{(ZGVKj|+21(*c3wO67?N4HZeKBMGHp^jl|A*kqq(yr zbc(TyfgzM3WE|p(!Hiz}dYeSq=NPbul0~1D-O=7*tb3!&`-kn%!JoN5?A}OTTV6>I z8F%FOA@>aDyoXOc6Qfn#XMXG1uMDr8uWc{c(RDp?6Nc&lZu^c#d2!>)SQ?$W-DcPgLyQM%@we+dijKX(|gTo6PooFZ!{?`7%zDxSEY-GAR>xF&0*7NGUBYb4g0elLd# zeQoEII!j5ia5cq(-+6u(aYnjuSC*Si$BfJNvBycsL-O(TTCQDP7rBZ*FkK;YYH06# z;VK(30LBuAw-;j5Vl#fsZ)`4At`4ep-uMO-AiSHa%Os{v?UPE7+QIpxBWpE2p3$?K zum=d)ApTV-qRs zwcQ?q7XhcisEDrgxeOy3*DHn#17+A<4-C6)e+}!lf(LZg#k};?a&<^)TOI=6P|}k( zr23_nlRW6ws;{L=eqI}7oHInLp&S-eWO^=HUyE6QhzUCu;~dP_dldNprL69@e3 z96;xvyFVv-9=|n)mZGqHOlBiSuET%C6N$={DUl%%8zeF}zqRFi9^Kb-A#C8$bRIij z4BwvJ9vFg?!^qs{r{-Pqq&ZSn8@*2AE~3orbTQpZZLCu;n|Q-}JvwkacBCAK%i#Cu zIqQv!;KRmebl0)?eq+V^oW5rpy3)Rk$Ka`FV{vC~dy2DQ|13F~sHFC->eVC4^JMxa z<%od&hs|m$=dS4s%FZr76T{Y zDp1e~{~T<&^7?-Ey)J3HXL_-JK#!nokXy?pJb^zm_hoy~xN|(X^8fRL|N6DqXY+h( zGP}pO@mA*W$cNgK^WI{~a(jV$J<@w-nrOlI&HB%ZziYu+)cV=u#?|KPP(q9xtf>#F zcj^7{eanN?Y;s$mdtjhqR%FpKf_`;QYAQtgGCTw?<|)B4?`Gf;)y~U=vQ>r zY#5a-nC@|Xh#y!`K;%2Bbp5*E6j;w)RbD2XpM->cGGv$1D6Xv7$XRZHGdMWCbVvaU z*o`_XJ3IaDjg7sNFN-S>8DnbMjEO=)W`Xq4lC$%YFzX{-kXKbc3}iu`W8kd?yq&m) zBLoB%`THMILhuB`5)E+frlWNXb}U~Fq>!r*3Y2iAsw;Bn&u-&&hE84$Z! zTiG~rx$%$HCZ?OHo+# zf6T!pUefPQPIg?3jIOS(46ZB;whm^DOq`sYj9-`;nVIRq8uX6tHckd^^fr!U|8esF z{0N&k8abHTIhotq5Wo91Ftl}c;w2@0ujqe&|7oX*oB4lhvT^*Mu)qN_zCU4PV)(-N zzrMkyJnvF2d2=@tD|KOWYZDtsa2Hq&c`EQN?$5P|Jmdvbd|JU;WJoz6> z9>(_o{!c*v$<{w9I9+@QJdFRFdOifnyQemAZ+tQrmQw~_q29X&{PP3buG?*)7YK#eBR$CFX3ma7LufQB2hn7 zeP}d9NDARFXa1`DsV^k0TU}5{5MN%$Ohp9j-FKP&IWlc&Dr z7GXd5IH~Stj(_Tre&8aiFDe=W*d4qxxBC}xrQHBwWN=X-FpWDL7D^hF>(6}muRQ`= zB;7$&7pDi9yrdMuYS$-wJuL6uH*mq~SvlL`H6RG|T=jO^a`b=RYhu8~#V)b(97O08 zU<*V<1oOLkHvk9R8s?uH@xlUr@Cu}>B7`6Q=}(D)i`Pthgfl>}LvsKF`|tNO2tbj6 ztM#`%P2~6ru69c&CY$MrLHnm8+()zmvJr50(>)b+pg`V$15S3oyi1K;Y8@!c#zVTRM zdhH*(4iiF3aEyMAV13S^aH&B^aQ?P7D0F&gJjjK6h(}Yn*%PyQdDT(Jji7NL;lO!t zfI!0zBYO8g(GhMYqcomflu&0z{jJezdU@N4ZWEX*t)^$aB$IT6Fxf#SrvCRJ3|lUn zVtpVYfl2HmZ|Vdtr0B-Gy~sn^##NY`B&s~F)DO}GNg_*0cW4lNoY+Qlnh~@>i0PV| z>W_%yCanA?%1CGJ1zHjW_K^L>t%3UE#1O>^yA=l>b%tR;KQ6e*)Au0-`S<%ndJB`0 z?%dyhKL$E&?Qo7{(jq2OxzIO!e}-7|kRsGV7Rc&+wt*)<;Wp3`HNog2BbuvF(bj?G zJ(ZO#w>*V>Q8($g+!L`Mp3?VOKk)YsZuyc}g!inw}B%T9cBKZPlZ zjdFk#B_0HVs7RLModQIob-tO6>mqTE@dRMZR%!jHH5F&bV!n)}+3Jv;jfXk% zM|yX3btl=h69UqsY7Ikj*;elRG5|`k%tOIZ#Xahgk=6e82%Td*O@6(WfY<)=x?=XR zt_!>`$Kch-Qls)jym=^>6ll$=wmTq9TJ!;pe zGp%>nmBiz6qFQrOI)CFXl9cA-2>zqB{v)gT*8WhvK5)jOb~RG*PSP+wPo1gNbv8!u zXxyh7N=IC_kv{O?+Jqvy=!U4NAYU)@rK@c4GRZUcl#oL}XPHp0TuUq>+g*1SRtH9#IRF; zZ$j<5DTI(%!p8CQEZufY#3+;xKem!Gp@J`HJX4B16sEExJgZ74!={Cr{kLAb7I3IR zbV^};KBpJv1HFinnN55U0*q}tLy>|mzkdUq!%Btw!JeSpk%rG6@t4fUu|kFI1>6H- zrv{>~K$~VhN*&7U(Rl8%I5XEv!|j@Sm%bjq;{zkT!VFGkxO_2ge|Uv@RJo_u@o@D_ z9(Tnze$hxA!3K*tgtnRAMUC<|19FPtjKP{%7J^mltHUbkb))=3*wF-eWwboP~frLYw*)vPDkUzomY{go~X+CP4M`C6r@ zqCqfox5?&6=A1m2H!zeeKOV_xE}B|$xnIbaYy!On=?_B7O|-mKI`c<7*eP*@Rzo$O znO8=)`#Xlak0nS-d@xg#hd&^7pXWLDN5 zwav}P-R7?7+9=R?{^d?v&YMfMpj?9)PsA6eBdzr=o3g4*l}c+ z_;|J-q{hB4ii{MeB@-XH3zRjAg2Y{~Fwg!3SH8Hi zwiVLP>=Ee*Fjk5;2=MFt#B%P)t25!0HHO~81*eRZmbjGvjqHTm?gydgz3SooEmIk3 z^t8(fr!4o~=a~xjy}~2J9Xn-8v|uM83@{3Lb7MZCQEzF>eDj>|;AaW+A45^bC)H7_ zevN#4T@ybv)<`l`*yrkH&fB{N7DW3uk&1p%BJJqxCAx^NHj+%r=p&6z~WaryDddz zk=3>zOW>-G<+6mwrz`!nZLOF6TpxlchQ_Ln9Rw^Ee9O?c10l z*M$zowlez@^x30vLKmHtLPp;TR05sqSR3=%SOiu}3$5`C`xXG|wY&{qEPX6PlcIr- z$g(G!#E)SZl<^!M+z%DYHY+=qkpgriK&7DKx1SZOmX64;y2Cjq>xS=K~&0bBL%P#za#A#Lum+O*DF_~@$QCT4`l1el#9Q*^*bEi z{U+7uI`BQsQ`4iuoJ>j%vIC$3PHlaylWZD&)o4C39sN*4oI+%57f>pc&M`(kieoX8 zP2+KAmPz^I|9E{sHiN+@$9sd}{ra#c7>2BS+2L`6P#nQF>5|T7z2Eir!e}yLOu+A| zJ7#$Jr-K_ApDnV{X05c9EgFUIp7`c;E&a6f$%afOGe18c=+v*C@N+^W0&9xCX;p|! zEIJ{3-RDWhWG2c`3uHc1IQ5%--70^xpHlp3g8xnWWVxYkHcuf>bhgv`=_j=HW^eFp zi4x`G%ia3R@}Ni@smSN0{s$u8XTcbrOMomgq(7I(b@68z6yEDtY1@@X`nh6-KB~ZY zC&Q4>gRxrWL)*fHI`Y9TSxeLr(bT-*k~>tJ`DuE%>VZ_Q)J!Wst~jTxMsd?pyij!)xxhK*XPBk7b*tFbz5y6{se>+F-1Yi5tE zPutzg%GYd1&Z^G{AiCVu`I>{|zT@{=Nk3os(L z?R+TwY(R5197pEx$@lFA3}*2JSVa-?rePRNAzH6AMvWv;Zx>1Z`axb`BFPwf_4@KO zV_DT<`Z@16ww~8i^G9bWUVZbCl#3r)%cbL&JDAF5=B2Dy7hj}C>Fa6!(joi1GbEQy z!Yc$9Ac~Al+B%so*w8HQM174SVTRoiG*h|T8HiW5a49u`uM}-Wqc*-zp#t<{kZ>h= zC8<{M3eYdV?wX;NzG+L8DsQTC`SJyK;s%u zBXcv`Vx$ruolCJC2Gca|x=gJKl1_?7jG+e!G9~&BjOe zGRXAUi(+POUM}^i{#q3zj{3FZWQeFOiqBd{LGIG&G!f{J^gEL~S=Z;`N5RL_3I?54 zAOLu&)o3MQxljn(_jcZgqTJ?U%}=n2MWfSNFIX-RDPXl>5!hgIb2vBK>TKEPakXd^ zGqqS_^Z|sqY_lleY#2ma=JWEXoFfEN(#M3_j~Jl>p1@4HqVU-Zvv@rp5BwqsxLt-( zw4JFw^r&xC26XpcAJhdo;d}Lq^h*#Bu;6mq7cd!3Di}*; zk=pY25i}(Jj*nIR{P1FQs9zFLfXwXk>}C!bpn{}Yp6&A(_QeqiixVJDJXmggI9qbv zk1kg{8Y`xro+pNWz9!(sc`hu z=nqM`(l5jLOngLzNmv3?Rjq3d5@Lesa(yKW&2yPh))pKx;M03wD%$aCdU6nO`>5*r z{As~UY)T`rbSD>PJ$kc>6)eC$Wjb|k-bw`K==C2FH^KxY zKO5Gh5JXFEyt?0ShVGg~VA1c-$nmRaT;89yolW~ee%Ns@Q?1Ci9INtc_Je_AATn{s z<*@rn1h2HaNu$V8ri>R&tV*m%Y*ev*yO7fZ9CTnOn3lWl-4s-~TY3gUud zk^xDsy3-@SJeNSo<8?gY)9Yq^-j2k2eXtPl`HW5~1F9A$OQQ7CKly3jm+efz|DhHz z4S1N4x|5GrJNfXr#%9uY{VHbt`|aZkG-;#9n&9Eh9F&m^Y4gcOrz4Fnd|b5uA!0s| zE0{KpRCuk`Ij`xD$EX|uv}m75aPl)a-Sic-9+3U}-3WnhNjFCeaOytF8v6iRb9|(EIA(b+4 z)iq~W#JH&O)<@PB-kmaw;^*x2AF|fx^qB>v5Y_E;Hq|2ykLz$HCQu`y&Xc#^{HFN3 z<+IQhf}CtEx+3)TZlA})UF1Uv;yhWJ)4DK!$9(`Q$PC&r4G3N|revhq!rrHEj^-aB z9Y_^wGuNe$ifnQH2-?Rsz9{(~i?B)o*$9*zI zqp7R0Jz_+cTy#=s1+~(hu@3EaH+wSJkpqsOedC$j)Owv>AzUwyH}N*2WSiOOu`G0z z04R9GDV7PIZwuOvQ7tL<1k^%K`_Bd*`ZkRO_~ms3$KwMt3PEg_?Tc=uRD*$jmkFxNg&onI&Q7{ zrs~~SP#3V8>;Q4wJ*{-b;=)Wd0o7VlI;JU@CNA_j9k9rF!}r)MBmsz{)SHtx0f;u! z`4XaN+UCUYl0Y%#1`8G3><@RShAFdF90~T4pufM<#R?>YvvdHoICKiS(Dw(Gw9R*X z4?F0%midOk$IA_ACWRxS;lP9WsOk@;i4G*ui_)01DtpvV5ISl{)aG;kD`Vk6l9z6x zFjjOhpD^=0IPFfG+&G zCnPsC(^E{Njn+v&u1^&A$#15Yy|PRKgTfKRs;xI*}AT`6Mx&p+^D^qY3!MyPut z8=p$OXcC+OG{y_JC6Z7@`U2m0e} z+E}Dx8LfwR?LpvAZxs@RL+0(HG!ni^nE6Pp4?bT z$~wZF%5~2(UJ0L8mlK+K^s6STpA27U^*Y-(@w6%DhZTBeJXWeBsYlUGUpiiHS3DZG zudIuj6n2B(l6LD%hDB*d8J)G+6=AkBfdo9hppkKU#!XPt3X|fZBX^_#)qB(eRG);d zXHr;a3-l}?ScfMUc!};1KMf*GgQ$BCSCF&1xPvHC41}-6e;YUq9C$4l7x9aA0&71a zw6FUh$fVZDB)x2wX(}JzN?qWIMQ4#%z4;G(SsP!zk_7Rc^5_hrNCdw=1mfDH9AH&y zGYCOh_6M z>xPU6`F*Psx|{8{jgcmgs=6x8BoJ}X(%7seGr3){gtE7Z`9^v z>Qa~tAk5y0Q<)#`8X^$fE=^5nRs#y>$iB}NBf;q*FJskJxI2f^?gN^_B@<0{E4z>2 zZ51(4K`3Yds_BGBG&DvV;kw`Ej8Zl$k2~UTo@vx+vQY>e1b)~b-F=HL=igrn2tBhq z=Dh;6DdAYM@FH-P2hN5mlOnJagY-|b{QHvd7-y?m)rwYxzu0M>g#Aqfl6-G|P2bKR zzyDAs|I0AB(2fjw#X~x+5vHjAzAD5=eOP!V`@t~H+lXn6@KSQE4Zdz51919l#&C^H z{8U@xyTHPsa``Z;14@09)|1L99L-y(JoTg~u?e$mOGfID~+5jK$s$m0y~DzU&fMLM2iu^rjF5YpB}!Q$wP5 zhtC2CL-a{R)OF8=)M(|68gT=P6E`%YAXQ15$Ew}POe?%^=G1-&m(+6fqQSVvPow8s z{7z$a0wW^5$6p&N(Zr+qV2GUUD*!l^iQS4YxrNu=9s=s35W5Bk{&Rr3)eYHn-lyr$ z{@+4BqBdeH28`4RL;997DwKD42!Y$;}t zXpI>f>=N+1otC!v{g&P=IOI@#yb&+K|ETA}(JZ@6CA>P_)iN}?ieZdart zcrjIplsLCPO%#mAl!Oh*sgFB!wkvIik=`j#q96g}f z*}tF=3_LifVw<{hqS9r6V9+X{dsYCS;129a0a`f6j>A-3mN&tfI z^_192ZPYb|OqzTQA#bGR)i0$8%bB!zOOwkt8%q&n8?OQF&oWiuz2|0X-Toq)<=xs+ zzIVOtGjz$!jod<^^n$i^iQf1Raw@%o$$NEGmpBj(UB98A#b4iW4~(P3TsCYIXhZyW zNyLY5w8cpeyhbN(2 zcaos;#-i#Y@Y-mM3z2-Jc56_lu<7Bb|7JEvOrQARL+;ynfYaacR1rqPOYmA|xq0_X z!|SmvXB~3pbJ>owgDqFnix%~MBGBMMpxMW%=WwW6(yzI@)<_o!Jg|G%XP}!Mx^^tL zh&p&s4bU)FAHWmCbj?rhE1j~_C`l|(AuOk}+n(<8uu#zL)}gJLS!FvD{X;aSCd0&Z&a50p zWu3JQBU6TxRt3hTJ2n_q90QtE5`+eW;r$7`K>j0|KfWo;<8X|$W~cM9C8sRe1+@tb zp9;2!Alxzqa@TIi{4os!k55V4enykw*l35A&xWO)bKs#@YeBpEwK?mHBSKq$;Jy9{ z!B~IqEq2Zh7~%@5VBZiG;%(l_m_?kcO!kt_9^x~Ygqyh(>^_HQOAdy52HVOk44BR*WK1bbeY=HA_xUyW5Z(CWJlw!|^g%4e?bw*6*JCbe_ zd)`mhxX(0oa_m!knJ#BA3Ao>m-`E~t|g!VfHaMVuyP`6ac)tP+N5p5mf4`zD<w>?wLc#T!-BTIq2B3)839m#B^gg6(aNmrGo=F{-Ad{4LSk5=TLwq`&P~E z`Gi84w5K+m9o=CzQ-#hxOmZZ+^bXJdHhpaANv0RJnQV0!Jxovd$iNAr*`fE{4cOG* z;cdLlh2-w~LEc6CK%Y3FH7-cc&>Tis(8TOqcl4! zA@r~u?u(%?ma4L~2QM?pW%z2M+F9@}<5aIvM6qUP>1>zVIxPwWHkpr;Am*~WDP@O` zBbkLpad1SMb^2q74(!pNU5Bc?k2eKXyB$LuVZXgY?|GYnpQs zWn}2N^+IeH%fU~3d-;0Bc8x;U0$I#q?A?Iq@5a+qt$d?5RuU1!g{#WP@9DTbg}20b zXB~SXV#@x}g?2Dt`fneh^C67G3CwxAskux4BN6j?--#+hqf?UbcXE?-aTW6)iwGYC z9w5-IsXQ3Wx9bjV&TvLU{~=TDOx_)hbiTo}do%*cSLt^l|3i*qzO!dArl+4Z-yLC1 zucG{O6c!A2)Zdz|_WV5ssB@tQ3H#2x;y}uQwJxFY)xuT4b_0i*1|2w%U{V%j{a(C% zF*wl%51nCheeH~Y>Y@VRBB%=h|L!iRHIoPDAA_440xlK{{w5Il;EzeGDlaLCfJv<= zY}KDmf~Vs^^e@COQ1E{5!8C{Q1W5s`)>;#z3AjaD9QO*JAFf!aTcMG&|3%OZfEENV z=7Dw`w68!8Fp_M&TrZ&+wk78|pYoqJY&d|EyYE0U@luEjMJ)1kdy==aV@x4x9>UB% z_K##We9wN(DP>H^e=Oc(|ek*kSOnogTPb!ugJe&2;6SNH911$e<}axzzxg_$H7fDBm7tN84KLk zXh%Mm-~ZQ{X!i%OjK^|Jf|KLqwJRS0wQNZ|Xjx-h(mc5}UQw zk5X`0p5TR2iCi|HKW+wE^7}x%wZ3`)O&3JCRP{Vv6#z1);TZc`{$=Ph4%GHwR2~?S z^&nKq6OH=5(e0mg`!Kh-}JWhsqGG77 zem5;y5)aS#=X4xAzw+Liy#D&7TK{>sLWAF!6N4kWK0aCoU#@6@u7KqzX(l)?lu9QA zDWPp-#>qQkJbHOU=wBQ9p`i9AvWx1>CKTt(HK>gSBP*4s;ite@;DD`)8NaGlgM|py zSt2l#&pVAlw_TjmVaKk35nEOZdO8UB0H*KBj5_n;NB3|USEU!$q?gWUWH?9oL|bV1 z;u`ZFi*EYzV5f4jyK;`woPTEQ(s-U;-PglAS^rWlqMP?R&u*#lHx+%xY*f}YQp;TSV>2J05$VanVusgN_7olGh-z*+ zx0ki-Z5v;U6$AOiV>}?K()aTUfveJKsa%;9-yjYYpM6yZ-+o?!2l&k_kUJZv!40$T zm&BjyU6SFC>mHQxjiW+jcKXe_An)M7tWqrRZ#tH;S*no<+~hvEe%ski)+^lX3-u%w zM#N^wXEqrT=Ur*Cl>@(-_6021vfR+5vY_#^6%lSeD}g;1>xMZlvxicrzEEB&@A>0K z6VkYvhq)0&6ks(z|KU-IL~NV$+!>%a{UId4X~k9P!}Lv%vm6HHdP^NM^>)L;)Eui` z&KNspZoHJ~RGfWZRBmurs;~ya9~DEiqqCmQsHD1fn~61R)3 z*?My&kL@V-Pptl(MRM7)@sx5Dk!3C?%d=oIL(KU`=$=nDgCp+mM7A6v7G12-hmz)l z!u0RpL9AS@FWBKWmZb-r*pp6WmUxj&W{`=%W^^ey+9ehyw!VKAq$N<<8k`J#Jm}{`S^-TkFlqoXoZlan}ob zA|qUVT@~57^sV^Vc5kRk%Y<7}iqmyoZtuglr~7lx1`mnhM}$!8vpGbZqnxS0C>K}n z<=NVbnIpAfX71NJqEWBUHg1*WbdLJ!{431E9FKe7^SYc_r5?tu)^Y0Fbts&;zLZz> z>u*cN;8pTq3$B$65>8$fy5~Q#9n1;RUel_IIqCH&L@}~$#(GqAm1WCL}&EM zi|(y590<=Th2236*z-JX*@IeEw64^sqxdOHM@dBowmjARU1Kza#}V|%a1hmXJuOw6 zWtV#JD|-CGTS49RX#TeXfePTQ4d)EZ3;pGGJVKPmji1Yu`8W^#LWy4|OnX;XDEAixMO5 zPmC`mtWrpH6Q_S{7J^W${h3-pG0%)%lk`pMOJx&fv}=n z)wQ0eHp}(Ds6t(`yl*wuL8xS2=oHei#?lSmcWbUs;~AVrWuctk)PtUvorOSuJdWtQ zxJqrCf&h$0nRJMViE`>_-n|ZcE3M9DDC)~n0s^}$Rm`*L@5^l38~ zxCs~0>f7T3T#PCJ?;mS@>R)3{uEqPWL8!Sq?n+U~+w)CXjROc+$qg~<<_WyM#1cuo z4Hh-FI#Fn%T*HYBp`yslfFO?=qSX6%UE!<<2!r(lnKJKcV=K46K1+nxJq-_z4CZPA z-i`q_Bo0!bG3(SS%#Xyhy2f%%Ne*5;0L*1x4nseG+Ho@SO&}?dLw#AFI)a?J0DwfO z?)`L|NYoC|0icsP?4i^a)lIY0F}rA^g;@4zzJg^LSriakbgtKEmAd|g9}K&a$u-M7U(Ujurk1X^x!NXL zeR4KrwV0(M<6VTOi!GJe>`mB{1)!c{>(2L*f0jU`^Zrh`c>mi2aQ6dcN9HEx%#0Ld z2RF(De;0qWxgR9Hx%GD+oJn7Wo6X&&%K>aXoSR_$#ha9t0A46mGU1g!-aT#yS=xBd z6g?ezs}fILCnOErY|ia*6!0;`GOfplle3IOCN8e#IoKDQXpF0}~BV@?PGsPI+C;jG*+*<7gSa*de=qS`w%@6(hBwnRKH* z=_&xKHpwQSoa`_s6wakyIQ4R>X$dN?E3!iOJZ{z=yrvku7wjjs{-b6Ll|)4AUm(57 zxMc+u4uL-_l3Hsr3cEuF3H`DE!_F^Bfm&svx2NoCP$*D6clJ&SdiqhHRoI&{Xn3_A zTj=vv5|TH1FcM_)Ube4pv{b)hssVyK@^pl!;f+EP&MH}%SAGw{vB<2eT)$mQC4*r{ z;0*zx*rFmi6qYmhE2eRA1*d%iB3YI^UGt z+wtL|HX|uL@(6OlGK!1v5{nl5eS<-#IJ%J2MSxt()la?4=dRytA?>gqEv(F3ZR;2r z342RFi63R@Rv?oGJE<2%OFvI_`ow@r_Kx4V_-gqV~mWwfru=wWg zfJ%PbY(_Jq&+~=Y5|M5QXgiF81PW65j!=w6qp|mk?({pVYmbQzAT7Q8DHv$n)=fJd z;6~|n2bB9OqK~Y!uG5I-E;!jLYYd2U>nTgtqPmMx{zi*p_)}_n?}QFMPFFMzo`pmR zS6u*D7Bn&Ff%2V37UPGxmlbl6|6Q8re{P0&l74unwxGew=Z0fdmdI$+A^xSt32ixv1%^| z^GrZ80G+U%bMO->EbcTt0dbb9!~MCz2j8}PdT5n0&w8Vwk7cF-g(SD4b3>ynDI01X zq@O_t0mAXXFkF+6N>E?GCw`w*I4MGqQ#)cOc3e{ZXW>cKPnm)Nv#t|TU)=L+Bol6@ z4Tzx@tD%*;*gLcp8slD0;fnQRq?FoD@O0mlB_uh$!>3Kgg$8`Ij4)Ofx;nInCo^m? zfRyQaCBl2RRz=#0S-JRHH`8^`<0>r~D5{0&d=Ddx69>U(&zzcf_nEcL@y;GX4G|)E zOrn7KkFD1G>j_K~tU09*yLvo632d`1YK6O=7c#AlJ#9V@k1+Gd%NMUw9%#F76L1L~ z{oYOHv*c^nR(f$@)`c>KavWJfj z1Vldf1}z(D#9u#`BW2Cvu$V?>e2f~|G9WKv_p}%`AvXxiQ7n-BrAaD~APlqWY?4i-^t-KzE_T?g;Kej_mEp zikL_^M%>XlAOgjDy)*OjZ-ZXr)^-ekp&b8u^>GDGgebK-iLpm1SYAGjX-9kO3_ie?t z>lYoRB@C<8;^Y~($((8aboFZYoSGhem+yDSl}2m(I%T2!FXHhoq&>$BS<-CfKrLNY zwhr}{#xIz9$#(R@9$F)42F<(I(xH2C^*{o`w?KdNanL}LIuV$KTQ2{6hS5bhaQ0JlSS=}9s zFl{rH8=M0dpf~66p?a}IKzokRG@lc*J`c`nMPpmp(}O3`4Yh2iSQ9kHvNGFzLq?}Q zc_RsgK0Y#v>36T!Q48?HHL8B{?Ls>>`8E^=jO*O}YSuswR!$=JVzxy-+6C{+(awJ)Z9`_-cxCs=$YBLxd-yiXAcp-q2%pdN=C)O?7~ zGNIlpMr;@aNFp(5;PBFK+}jzAeCJ^-sR06d7IMx2VCuR8oyIZkFk zGEL8=s*1;sU?3|9SA_+-Fff#Zu{crz|#E*wc& zsCG3O;SNY@N+gE1+sTmDe4#S)0*Gv4P*ZyLSti>C1dO=*vlOlnOwFq1uZ#fd7;@5ToV1+Qu_rUKD!tO=jgD9E&^%G4+L!r~9prx><&clS( zPkuAeGu7Ik>4aM`$z~CikOvM6DhTUchIl=nu@_fsO(HJr2zlJ9Etb!Ub@wNkDukPW z{_QCuzF`~KV76r&f=!){DTVkaJ{CnRIwO zOz1Li#iQIynC6h8l&WFDN)~A99jr^tMoK=>G6AKm)8lBKX@G( zQ6z_lNV_%Q>#=Pig&ZwIVAej_)P7+sklJSyiu>@(@eN^ks8wV{2rV2~l32I<$c@E9 zT^(>eCDC<1SyYVfgsZhZYMbDiv zGJY$A#5;BeV~Hxru7B7MQ*?Kb2s}Am?VS``8zt+CnN9Ds+S^1U^*-b#RFy4q?7PV& zw2_S_{z@sEaoz`yMWmvpm!QCyKL{5gs0pw7tm3gauuwb~ycP?gqlYSPb?tw`xjF(q zLKyW(%+&*ZC+`~XKg{7=)5BU&4R8(`7%6Ec7aqZbAsCBL&XtR`ELd+l>pW79YNk&H z1Odt-$~;4el<79ffJ{QrD(8wyj2!L8HJ!hMjl}Yk_HIvZdj|3@?Z0+qC;(|y%b3+$ z@C17Rts^&G7AIQ@tb2CpD$qs*gJj%Jd3uH){ z|2Pdr3p;jcsHEyx2f1K)8-4LJBw}1L0rU4!f`PYIv}`7qhOi2cE7t^N)yv|7_8 zgdTg({$d&=VWE{UJSrF8`ODrQqH4$Sm{@Fq(Ts21 zoRVuv>XD|76k?2srFx*)y8L`8!?Zpw6I()QIff)hTXeYlWXi~{o}DK|xrDq@ru@-|Lanrvv`v3{GfE!uv#Tc5R64F* zsMLo>wzRZD{@vC2{#|a9A9tQ}UL3w@ut0X-j5od|mg&Joq+k4F+vIUl zVsK*66_PO7PnkP()Q`u!w>>;ZlfAK{i!?8MQSrk4^YgcYuHr+;%%r>zC1M=$<-ykWhNNOUUmG@}8quLnG;aGNjPpM8?5f0m zZ$tqmy7ej^nl)5{GXv!qADch0sq(j5;DFQ>!U2|%uShhjHA;ovW^mQ{baI-#<{knH z)F}@Et-!9v?+|#oW_A%C8tqn>L;UO?3UrLXRLJ-r11tWo1^a!gu-DQxq#yeQ43mlX z*9+9XeeG1SB>ma@^;t!3-%Q5Q3@qrq4t4P2r*UeUX**e`!XZw~UFLwz{^tjJhSyBbPA zc?5K~cMqgKNb;=x@e%=0vRqBUQs%+K7yu6m9vB4Y(2rl)K=&mommT&u0;r^#6Z3qx z$=|hceE6~Q?&aLPYy$lUmxk~VA#?A2DSIDQS! zq1k6Y$8F)iKYr;zO=zuZLp8GvXn(JHtm1y*aJf63_lfiZo9z8nNmy!?TRd=chf>)? zG}Si;r^gw4WekdZX;vmsBCqdok?t#(W!{UK%o}zT4PcOka~^^Xs+Nm*7ZXXW!SKuT zP0?2hjPH1Eb_?GKHi*7hY2h?*pZ?6&LyZT^c7VK?=5a<#G>48XLH<+0Ksms3A{uXf zfW!)2w4zNWP4_=pX!dG@r}y5A;J|sCk%oSWpA~46c=w_9^kMdY zGo$BiTSIMD$-1zR7~+1kQN;P3x4B3&8biZA^RssSxjIN#a*{`NnJ-s$Uex_4>Ahss z*1Gb`2+#t@j7SVBIVIp+*F7LJt4Ck%Q-jrmTm@T4LX;^*WeS%OpKu>A+IVK%Y)R+7Dj>r=}1IOsk#^1#Q&?< zw=A}(vuSrnHJ`jl(bUzuzyh`TLas*saYcoEMR+jKMKcyj8L`J%EGU1E-6WRNQBW~Yi`Gw@6Vm0qk)lJBAW=eXWFYIqf^RvK{AX(wEBH``8S+aha@`Uey=X&>h+)Y0Em{#~xeh~8rk(Q= zTE6k(ZL@dl)*r5TRE@UVOSghO9hEta&^^L`e|{+ZAeHu5o~hzICnlG|mZ0w;pjV0z z#VYRkV?{MeTQe+(UrdX8i7zj9-B^G^srbz+<3Qb7E%tff)6WhuuDAm@dCRO1XbPn0 zL3Y~A%deIQZ?Zj)<5qo6dl8=Vx|9zlIsVPu$V)L6D=aYlcr2e6F+BL-_dOF)S(5DaYn?tjUyHMc`tI#F zp@+5WnG8^W;OC9;xZGXmcI=0e`cS>6zQ(D^@qJ9|N1?>{4jT8|exBq>p z3BY*wF4){k_>WsCh(n;wd=aJ?ktAj{SnGr7+Us}bF?j#z(K|0BMJhjv8{x#Wd8CfL zHXF0hX%|GdPLdJ!-5yC=Rog!QN2j?VV>KCqmTNWAuiAfk>9gmBF7%|X4#sLfX_@~K zyZm{cCNDF8im2uMkJ8uo^x6%cBl15V^N-vG-FecbE(nJ9|C#&zM-UUa0k6Fv5x4XI zpFiCo1cIQ0&ofQ$f5o(qh@pZHTLGV*Jx9p}hxenJ-7L+>qaAlRnTf!3JbhkXwF>ju z(`Wl<^w9sStVtmF3wr*ZisJr3%l{?(vit92`^t{O)#GkK%t)}td}9X10^!|W$=v6t zmATEjT7MvvF14Ul0=4)_rvrYSDQ@Y3@JkH~3~eouJ~D|(p*zMXLSA;cf;BtxDDZ9> zijkv#f;Dn`cP%4<*r!Q=Q&F*ZN#wK+ypqf$_*cC+0jxH5N9#(M-|f`qOBa6KY{&wmLbs5#voQ7YYOrJ|RJ__Yo6Kf9$oDzh7%=r12*)-} zU-F%z4Z*DakfE#w#qaI_4g08oXz2aSXJ#Bm4TFY2560R+80}D_(9Ameun51KlTna{h5~<{q8;*r?y?0@n`@vyY9Iv!Ge&D&M zCjoPZVCiGX?U3t<-qiPf;C!-QHrae5=U(aj`*>Fd0O94>>&a51CTYkZDj3P3R?L2b zSSU$wATEW@`N>SK$)n?0y_n%9${?9=e zhw@i5@+`sh)EAu_W!$I?0{~x3gPF5J--iQz4zC;gkrb93J<4B`EIKU)hin>c&X(@_ zEA&w(+Yu~XwB0%27&sUue!Wan|EwiB6avF9==LW;Q7Es;_B&r4biMt4h$3dlwc}8Y z+6yQV=}EykH+a@`_VExBBHbIh_kNW%hp1==F&;6N-_{y6C~!R zJ3Jc-t$jHY8G}#)(oU?t*iv<{4zQSR`rmye_;3>xEQ!H>1tYD+u?41V;!(sh3jbC_ub;zG_#M0bn#gyqshoX+)ZEG%tXn|U zjSDIC`-7L7{83_=4d0$u%;t(d%c<$QJ$2;wx~Z}CPE_VgWYS(Rzf!*K3pf9c%HG#I zyK)L{)R&zqe?b9>bvq~+76f>>2JSQacWz~ypi|OCC%-pPb_skSts#&H^H-)177rxf z_u@kLfXshZb)ISj2YXe4jwZam5gM*ATm1GlanlpqpQahoOrlqf?0ANNisx?Waz4R- zg7?rX=6b#*JE)t*>-2T8&QzXcgJ-PY5)21OwY#~03-j0$_>iE&=8Dd3W>7}7N_a8Kmd!|(9tq}wAw`g1DD}l4la%3K^A}UMd!&L;~ zwj^V$uJZRw1LH;gI)2x~8e58ZH-$9zfgV%|-PM1-c?X{O_3^6hnfO=WA&IIjXWO)R zL`_4Avr>uEo@2wb;SNGPDmx)|c4QzmE*JWT$%D0W$Z(j64emMI6BH-zs*#cqm+&i_ zHqYX7fotn$(cG4z2X_9a36Fj_mru;0+hK(UgyDsLHtyKL@XK`xPqjK8!UP3VO~5)l zL1bdyq|7>!gplgLkgANkR=)4x80@M0>y!fQUspL_fzvxJ*JR|PeRrxCIj+}Sd5laqI-dwVrl^? zmGsYy7Qtv&U=Jx#G8KR3=jGQWVZpFbzng8pn@w<4Q3TT<);l)Rc47uYUm3LWDE}?# zMMPhTFiH!~C3%B!Di*3!im!zAw|!r4qaE+?4pS)&(+1r@%mn$RIV;=`=#)97;V1P5 ziJa-6Zz_QMBfEu$H-=Y+2W4>q8EjLs8Zmg@VJwv``fdgZWjP@D3q9A6Y#=F@90|i> zzSXmvQpeutU%5J}!gJ_nZ!K^sSyCllC_-Edgj;qJC4-Tack###Os|!%nU;ePAIk5- zJgu((R$XouyaqNK-Y;SDY;e)&N-TT=rH_wy9|o`bm=FW}h0bd0J$g6`Mu7W?bA_4d z;N#xR!0t2e-XlN#Z=uSTpk0m&(Qi>OrWN@u5`L*aV6kX>4=LuYI15yPKhXauU&@!- zh8FR1qt98lzX>f_V~J|tt_F;|En_F_#r3>Lqx~c)j)a*%6(rL=x$l`9ZRK(}DjK?m zqwZnw>y`Zf6gQ{hA_G}#hQa^f#qshdylAAz`dtF|0ba=UP(yH(=fgjQ@yg1O4Zogu zx+G-N)F?VH;6Og=B{L^?0H?3e{i%3?(6X@WVQK;OSi~VX*R0CS*hLj-E8$7iF9lbg zfAqaOYL?RNr-VMv*7jFT+H|#@5TFOb z4gNkr6#rY%-9x8*m5BvC=2#|=#bTmrJx;ZX(n|b{aiZuEkUu4LTrFtVMcz^ws4iHi z+fBo`vmwJ(BxNfs*s?7SHsT{J!%ibnnXwmY%O39UL*94ZjG4~k^VprKyiOL$7ME>t zGg_q#=KeDf;Im&)TuGrHH4yzKWajmc^CO@vhWChT>SDP9;|BMq&=a(o`RffwXg;Vo zUR@`Z{3K|muEeC4Fzg*ndIjqXxhy+zGvIrvQ>EM9bY}k^ZOuTs!%s1jXMK^EQK&q+ zi?%>drZaS2D%nG?a00!i;<6^=sMzepKv|3>pGD+bo_I*^+ua4cUDKw^CDnz`G%dnv z`IxWksm96$W}E{yOb$NTp9NLCTScRhW{H4bj^a$7Jwl92zby&P>tkF%j9bE;YN-xS z9bo@)mP>${@75w;X{jhL@T(oZq?&@C zT^7|kjrY?eu6X!_v6=+I%}SzWNMF=++sEbmItx0K3T6}hfaUqr=@QJ*SgSWQ&Gx*S zVZ|rOUp*d+5+EGaFe~VU~>8g`JC*3fz zpoRi9YWuG_(HWjAU(+vup1fk_L$37xMd$PLWUNWO{q}n^T&ck(Vk{2G-OOIqyRrr zz5imUs%muzJEd#s^}&mS7kPiHyZ(%@=yvyr`N*jsY8N9T7L&Uw=;)V5oRvyaWoZqP z`;YQBAe>*9Q#HFMXsvJ(y!_3igtU&@z6HrAnE(~Cko*0%|bKwGE+ zsANnBHRToDeUe>j<(d@6`eW;>>e;$Cp_$|Z+jy`ch`XZQ)2_n1YNQ%2z>`Cv-}mR) zR9X9*oP`SF4j#TZAddCAX#|p%ry#?nVUvky@2j``D1MB8ZYi?HIW&E?Iq0o%rPVx= z%XGiY8SyAj@{o%f(0VlqBq-Zm3+ax_7RFcs8$M+JMv>24k-v|00?j&|M{$e-Gv#>P zw60*U=@G}kc4cJXhig*`^I<-@s{!)$gsW7i(TO*vKstu&Yc$(h0S@y(D@KyJOGjpq zOit~dYr>L5i|_uU#QBXcND=z`x8~fv1_nkj=c!~k@f|py08RzW3P`>x?%!T&ksN+c;YCx)`UJB{=&u0Q+Jpg_oI%>}ihP<+++!HA)X zdtmIJ5R|=RhP_uku7Gx-SA}eWS<$FyzWq{Cw*A=>MUIchD@wng`tIvwhJ&6v3*+>l zOOW^AGYv-0ZMdesNlKhU^BoZcq-^}%tjgCd0&ik zKW|ghb2oM1x??_9%;KAeDs#*seQT=(vu0`7N(i=a6v9$u< z9%cPkeCt9Sr!ls{)Tbq~Sljw@<5*Suu@!Ch*i%#p=LaW@9D-;BFEqSWqqG=QTt2&+O`sfz25H1k?^V<85bA9h~ zf>dCrTV-roGGfxqQd9r!k3eiu)&1yk;Nt`8R3~4Hy$57rcDc~LrCo4pz9FtS3=)}D zcm8I_aKq&&J-RYd=v_s{Xd3Ea{%C0kd4``*C1<{l*(!G8rHg-~jQB@ve<=w+H4j()@~EXQ0o!tTMtWrgEr%Zsw%@tXu!0Le zOK86+(?F$4l_f^b7MtMNL~&ZSEGha09{TF{i_rF8>DFVS2t+22SJl30U?KXp-~ZF|q6sVC^znRuSSu_GI?inSA{f)}@2l{`Pef+^I;m4UDc7O|{ks+Y_W+TE3*cZ84uCBZPcJdz9d^vov z-6bHqW7lye6=<*w?tvu~e2yWo@37Lfc0TV5_APL++}ElRJxI3j-~0O3oovs;jeJs!7GV&S zz^!dl8_Ww9kGIWstLcfl2Q=9p`*ii=T;G?h%yb2b*~Beaw$27gxVy}ulU$YDq~fHK zn(${*H9}Owz=b(RSCVOzAaL6A%&a8KRZi~5mR93W>9_Ps6*P!=*x+d>g~FY5UhB;P z#1pEgflZ%R_Ox=b=CdfX#!>sWAGcXcMDTNstE{Dg<+nq&wW|%>mO{Pn_Na1X2Z5KI zc+MXgSI1s7pBVoL^Wkz=PC|i;1d4=V_W1i!U6Gug^!L{!QR%!Rq>g@!uVx(Korm`8os|f#zyXgbg z3hH8jTkmw2#Zh@F+kpEv(Dz;^j(}O2hIylPhfm<{u)b{1Gyt5y3862Sy_AXvLS%j{ zJpUXg4BV~-$Yij-+u+*>rJz?5d!Mw+%Kg*J>AH^cK+<_TNj=$lh(}#kba0r(8sCcW zGdT9Qj}}SCyMX1OsENC-b=~tI*i5Gd2QjiyU5QAGt5sNR^oLOq@q6!usBrKq;*gbK z+K!2ZQu>osfEeh;MJW^0aF5@Id(JP3_!?w2&oD{TX%&s++32Pd>E4sqc}ZebU`%dA zNAzHtgp59aq{((Oat2SJWKYbbvApP)suho=I0%gdtttQ3>(gmzl3{tOJFHF2xT`Uv zz#YM;%Z+BZd%Mo71q{KiXD_B)Q)lm`x+r@=MCwD+Xhuw8 zWt?7e?eDBiKqxUqGqb^Fy}+oC@evi`_t_iEnV@$gB$_P> zsvH^9P9lU0Kd|0%Ae&qcrk2{AEfmumdsXj!`6Z$GEeh_8vIBaaM^r5H148W;YYK80 zPy#4!VG4=Bn|L>5b;c9(#vw-{JFv(U(l1%hjN{Ujl%>(ZR)*_zgY+iT;TifFu;pMk zYZ7SMLQD={=upl>HF$5Rn!%09MTfzn@c3e2(YB~i4G~@2Pb|m+R2!%)FHjYNo@HUx zNR~*Ba+**}*vp;@=D}Q3o04WnRDaRJ!iYst;0Bmlsc!974wHQEQ=Z{=ugBbeFd%W- zUg#9b)NWlgirIwzguO5NyXiEJ>?6CSNxbLUB+fcaro-6NpsPN2N0zEJ3!;S|CQedy z52gy#^8ccN4RcOf5tA0~7TUAOu1zx)<_*GhZm|;6w7!bzI1W?kF(DAU<$9lOQt8pf z1?}!vR7V@UdIiimROp+7F)_t&Gm^C$4ts#hZx1nxv8w^yI`Agr;}3Tl)sXuD>BCqo zDEcw9<}C)k(Gj0uzp-eYRw(tG)V0va>ViN!YQE6Xv#!%B(FZR?rRmJAYKXh|-VIWS`m|W-grkFQMwDo&svFwX}3D;c}S)nJ3hn^vGcMa@j z7Eb86uAOywWtzErK3pxXs&gZfzBF$Vo#%M7+}fRsywE|Jw?2;Z8Y1!9OcN>2+2vMh zKJQ}lx54NiQ*Ws6S4$BT-p9bnUb$GDAst9|ox+kdA+ijOwu}-9)DH5ynHmY5{42}? zE;YY>PRPdmjSsm}CqA4!MM@YN4>pl$@uVNQ5)Yi?+)Hh&7cBh+Q2m1YfWYA&G&dMq z9G0RTUH6(3WJ@WdJvc;WV6a6FF4TcobdpT;p$IiL?ingik9X7sXZk&JVPAF#wxsJ) zW=u2Ua|H}p3u2F5vJ?&mvhbKc32KnoRJT>HAXe@=zlc81qR3Ljla5`q3TrUgL7J&ndnnh9)(S0; zYDr!^Ma;d}Ng?>;qb@&1=Ge+QeFy2C%QSnuzwM$7&I~Mqp@`r3vLOn$hQGmABTuGB zyW(3@l?7TX^xs7?0e&neltv+>-+1=c>T2IosDvVmwz+vJs)+@8%<`#MbxsBI<}a0# z9t2zn#Z|qWKvO4#t3$8uR}x#|`VepFwZ*=ejE2^VV8Q&KIa%(vp=Y@#kGx;6Y4i;sxxC2PU2W@x zl@@izQ3-=eT^@og(vIh^x4wTxLN9JJa73+$Z+LE$4%@pTE-#h0B8`Q*Umv@ZM?*mY5e}Z%}j8)cIr)4Ftmz7*HfqYGcr*D>b@kY=!f?Ga2 zc2SifGZWe!1i&M2QGI{KMhAz})q-k?BqGv$Z_I$p$J0P2&u`G_?{ct}pcrkj7lIh{ zwrxx7DAoi%f$S^1No1{vmojfW{3J)Gm!dLmB%ig4D3n)7>IUta*!C;qoENJ0PlOOc z7upiTSUa;wgNkH7k&F7koyx`tjQoX1h{YYUqzJ0_V1$$pJT*4u4%0fW$_!4{4cKnq z=fCSx5zk_nEW-Tj!KADb{0x$k?@fxe9eSVt71vY8o+LH((=UI2>?pS1Z%OIQgMT^a z*)Gd8x;@-zL)2Ubk|xA4_E$>W{zaWRz1$AVNw7s~wb^vXUr!Y_ zGx5?W)wPdeLmw0zfPo-Z`gp~x8+}e5+7lI-({2aL^O2%oQ@1^&#irJ{Z14Uj=-@q_fVb(_ z$zdwFHJrDY{grFq`c8uXZd46(5H;EhqLGPt^A@8(!hHu*r2oLeNpZC z!&=x$7>5@kHOIyVFMBzD$SD2n*HMrOiZbt2h-9vG3^6J~KVC3IBJ6vBvZ(zx8Gp`C zXM|ifsU3;#!Y>syVxflw`f$O>Jtj$qQWCme$K(p5+G984S6^`oQLBnQ5;}k)^Gxp@ z)Otvx=Sr6;?wLq@wy3^0{`TW=6CGK+#cmh55)54E#afCsXUJ6k{>%#q0}3PE&5I=1 zKFw8A!oV-K>$VsYw8*ecyOba$72WWiq@FbrrAU{=H0&R#&|b%Yd;1DQ)emD|o?*bO zhPK9Coc26G9py)=f5JX{-lBl$vBhc;i=y$l4sj&pDsa`Zl!lsoObjNMAk3)8^x4x{wZR0H>v)gPo<4;j>84|Thj-;@~(nI9& zP_gF8nG{Jy`I0kqa-RhfuhZT)^9j#hE=48N8NbGqz1IC9X#`+^EaLxM4hVn;3d#^t`NKWEQ>ThIBKk9S0)!7`8R>AJ1~IIScFK zj&p1ZFv!C`9W%helai|U+m*dCprYGMAr6&Wm>^;8Nhf=|k2o|@cldlHRzF)u=);+5 z8rKd|@=k0r+Qg`KB#ih@0<}f~qsg9_^e^OM)IEt1A{sKCoGBAecCMgVqlmhT31#>V zNJ{G5m%CI<+As}lf0{}~Qd2UNsata*RXQ4UNrHHsrkATRT0Al4v1g$mBK(NvZSwF{ z2t2l>zj&+YStY|{^5Twot87ByA7`$Shlu9SFBPDq^q|32B87Y6$8$)UQCnK9AYX@B z$&*Xf?pghyv55 zYiZ>21!OZe0Q3oZDxM3FaO4k96=qaTka%Clk@X!%ADD)GDkpG^OHsi+;<*@m{W9)1 z^|d>gRCvNp9P&(!iJi~yE#lv%kfioswMM|7#LmyeGR#>GT!0+C<}oUckq7IUH6HO9 zW6;A{2*GK*N%XP#UA-c*_-j#&lNPQq45mHpl%TiG^@?7?e&vL4L8TwXXnieq2v68Y z_!I}z>6e3c@W_xO@o0>FYv}wa16W};j?|FR1+nSFMW~<#4CrZdFJG#2kFs=oQH3jK zg^TEtiP&-0&&V;f@@+QPg2ugV)8qT?r0=6Km>9A;<)0+~_|v-?_PInR3j9q5wwgxo zln8>j|DI+YZu}>y0(^WnD;MRp=DekzF%N>H$Kg*5|1J`waP}8Hl8`*4tIDztk*jh! z^ip|<#asr32v(%s*BBE0l%vu`x%ZW6V9cc8L589txfOI5PMk$?Cpj4TIxzv%YLIg> z7!pCRD=ZTm3%N(HG|ooysa(X^gQ2N(-D34>J*tKL=eBllIU|b0ShvpW*qlSz7zZ6! zORp7?l-~-LYo$p^z_1YE7hCB+MlA{1CIHwohX!ZwKE2Cb4_s0iMJal1KJGWjf za6?)_|D{XdZCrp6S=~Ka9Q~Ir!Aj=70L zu%Zzn5Ze1`0u*>|GTp!#1U?VH?UmM3Py9r}_DN%X5JdXN)5_9yl<(`mwC^or87pZ>c0{yaj{^_yuz{9YqH z1?WIW{Hz_qMH__c#1lNStGL^jCr(vUtX!p$RNRH7Lz{ygqsO#Vv(e( z#s*wIjo$zm9DlDUnA94?A(p7AJQ0-I+U0qD%(eK+JkwBt|EM7opToWA3FXn5s8*Ac z0urL) zkf+z0xHar|K}eXDsF01mq5##27@iHp*N$652Dch*@C=d0rq0fxV9vhbK-Q3{IzfSt zZdPvZL&w=CTmJKB2#FD#Wxv0flQ;o|!kpX;B>UF#UavTf1H+JI4)X4jP5t;7=rYZ4 zNvgiqqeIFfku^%OCNUrt1Dijnkrr^a?t(giM9@48F12p`MmWRr1E`NC?g2`B`);<< zniQ;t^-=QVIU6AzJbray}y{uNIa-OcG(ImiYDM@wgMTgn7U6Ly^+w*0ib$F zWZ2ZnV(OOS){F!U2cik#q5GfilZtIHEM&KX#y21EwABo>?Sd%4b+lN8HAVB$DWo75 z;s|kVyI_(FX}<@aT(tMkYg9MAgSR)?m&dF8jr%)UAN9zidkp7FmCHQR6JW0Kx7R)Z zslQcaombKm0R<2G6wlUrZnfbIIfXCD2M++Y2n^9HKKhQEp#UX+jP&xtmY~qC#8u>v zAdF%fgl$?Pz3L8fsEQY3Fl;{;h3p=*50eZH1N4}t@ltl5GH*yeR79rbXeqvku?@)5 z1?qb4R3lO$C-F23<%Xb|vgCEq5+=5Flhm7mJJ^%9f=I$N zEi{(w!1c2m&QMk38$#mK@+FCp^H@MrWp<9^fKNyR?*v#Bg_@b~vtzqwx;Iy_t^H8s zLh;wAtuU=HVZ+hvS-Rx9OvjZ6=6Kw2`@ z+b#Bbz81`3@z5XcJ4B0STOmm_)iPqi6Nr>F6(G;FlcnRdkNVqG*u@o8lyb2|Uc4Yx z&N-G~D*o+)Xw^*oftNkVm*64dUUkOIan*i7>bf|5v^BB#1+_KMc2(5s9C6^dg)8B1 z)^y-Cvb27st3X@DeuZa;^hh33vGEks zFiwH#1w0B(#)4oo-L`B-i-2D|Nqn#1u(Lf;QQc)Vdu*QY09rYw64~?K?ShvZg7Ei3#kZc^_eDGKfmZ8ShMIA&3}Y& zMjm`t%O9&LqE-=0d=W>j1U5%MRB0b?pZ^c+PzR0uKFo(@D^HXWgIGV)5eh^=YpSJx z;yDJ`bkZ%)hOEfqvz}KnLR%8oq(5@X(Yfg7+HsbycgWSh*usKvSto?E( z+>l5U2^|x@h30gAqPKY}(CMa6u@8m_Cj|+4c9*;3_3d#?UCf5s;GLYJ)NVn)Q7=nIv;CR02nUn?o{? zAQ+EE!aj_=r!#ZSg1I1*DO>y7^K_9qLe6xWPhbzlBlRdnoEAkVGRed8b;oU2pqf{0D$+OqsLso>XePWA zx!{w06E5P+Mz8qeFW95TxH5c&ZO8)nGug)2UBu&-hHqM?PCAFqwE~%`BtEY*TN6}- z`w=QUfK&~Y&o=!cr=zc&CT{+Jz{dKxv>@7wy&JVsgRn5tV=L@#&BL5b;wIqNWrrl; zOGa|U^rQuhv9nFgy<`I-ytpH}uQ(c+rbn>wRaDAdM ztxRV}V1OJy8`|pHQxp32Z^(H3H zoPJyyXlNH~DlfxDkl+YE2BcWi78g)HBkH00!m`Pzt)a`hED?U#BTf;oApGF0j>^c( zM25#VVLQhD5oIy%!UTNfzMadZuhQ;M$b{;cB_~OyO(>5-!ERk{ot?Ev6vv+cRU+=f zKQ)ip9kS;M?C;JnUmD!7-qibb&= z;g7ATY8%mtu8A?cZi#L!(AWq<+;Oj}1=6u&UAYb0+h>a;YAi{=o~oW(yz=Lgxe!=O z-l64h(-(s^0I{W(IQgg0Q9{zfeCVq`Fin`y4bk(F(J6W^NG_B@b=73^9k<%`iDK<}mz{Q-g zrbCiq zuF{QyOwlmf!lwa|+79u+EGD+l9jhH^Z0tppL-^pbn-@Z@I-n=6%bJ5x$wSJ_Iy$>i zTy{HyA;QxB;XJZ$6O|uy8L<6b=(dP%=2`Z>1CsIIr#jxU>!Iv<2VBfB1$!oP=Mssp zn~p(8#_N-@2T|9*T_S~I8^<+35O*Zsyd2`ZJR>ptQsv?=8C8phPA-rW>2lw7hlsvB zCV}0ZVilGtf-xyhT()?ZQWCXYs4Av{YLzaHE+ZnMdAJuQjb zYApn(6rYM}z`3pv-&_;Qp}w`kpZ6`z+ti+@4kI6|QRDMJxtC);t4tB#pEEDEhqLt1 zhkPgy2j8h;&fnQV9z!a^aMx2nD(tHFh+elm)rC)?%f4FFQT$6aI_vlinHvwY)c-zN6BtK)47T+`VJ}XnAI1IEG4zUOaF;8S42yxDZe>==%Y9Gc)joMb9Py0)eSF zDR?Fwi3;}-{TdDyJ@!Dl_h?Z6Kq$+e&d~B1#BlIgtwSUpNdwCkm)Opr-Rs2QTcgvf z7?^!f3=g|dlYPY7r`)VFKYpb$J!&oeL5yoa zg`MR&LqR9My(?enPbL-hQoNW5oI|{>#+Xz(|Na?+s$4-bw--J3i;K3=B?8XL7Otrm zvQezdnF7E!xVSQi|G_uj4*d5PLvE#RT{jN#8AkCMhg{h)G2&|2Rmg}4B?CBkBgPuu zh1>oYh74h!#x}nroP{A*N_REnIi(^AP=etS+Uel7ffH4XGA_ZZcW%{VG>#QY4 zNw?XSdojAXAd=}H87Dd>lWfYZ^vO%p>9%8FbcwrMeghKF>p+;v(jWMOm-G8h@Sg(yjbyEMA zb7*?8+P@0a=$e^te^Mb*W1iWT+WsMLi3Azmu zEE49YfM=(;_~5WYClor|a09gfK3)OSYnnHy2CkrPr{1Y>*@*GIFUY$Q&?eOM8UxOm zoYq~bR^K#&Oo}{=d_n`ucao%Na5hx=shHwl=AAkl&JznPvHh*cVMdV{W1#rKrnI45 z=>xcGI71V}o`;6CdgD_=($=C^bTrrXc_NmcmIK)89KMeiIpg@&mFx0aNlrCNc1_>R zni#dDC*x)-gSo+RE&mYMON4HV%Foin@j_DpPjf*hLyi)2q&_C3=O`mrazeCe+bjOG zQHK)MSI0aJ40e^j_LqjR^Lam`MA5(I2$<;rCZ|^6hRes`9Mpdc!YLaFR9&PX1O+}D zA0KJP#=9JVE=ve4&-bbQ6EmQC*E?u1>Eft^XR<%JR;aiF+*)aR zcruY*ohQOO)q{8s3^ul*3!HNN;cc(R2MStmr=!W8v4W*rJ!p?<@{i{pvmCy@DzmcC z70Yl1Zk9UBVZmSL;pCT*&8D^Xy@z~9q22KV-Zva=030uc(mKpyJ@ok@j z3^Y1L}j1r zwZ}Md1y0FdfB-iZxOE`yQg*GN@gLSqnVz_}h^{>@fqrNlRL5gI{?9u1>@lt%NdDf0 zqequ4urEgJ>-vY^89P0YsjqYpy2#1cWHEIR}zri8V&}Fp$2Ih)%@Piby0-)Xf z$DRb^28?KRlagTgM5k>E)n((U{j80MFp^CVM#fDCV^7+GCD|V3dkdenX|2bzefP^s z#<;%Q7$2mVUb(eeTmYZW7+3~1FjP8$CpRsMYk6otaJ~#6I#&|QFhosmd<8=L9#}JA zw>4{=SF2yV=!Zt(vGrv-j?i(DWW$r2y9yt4TXo0OEmRE5nv#M6Kk#sjNUQ-@S_GI+ z#yndzNwab^Ex>h_V2EpHk?|OAdC>j1%9%evS55;@6U{|d*4Yg#;Zs^xhePYIjvNT^mKq? zgAjN-UCer+Blb_S-RqJ@G5jJ3y$2at61rg(o^?t%*~u8h`>44>OBvs21J z;;nNpy+p*Im1l?7Y+_z3NbV0Pm-UwbsGZ_PJ_FCJVbo&T3n~{;*v(PXslD^ zNq+VU#z`E5A6;s^5l1^1z<~%+lJaNb++?|JgrOg!YkpdB*C|n55~sI17Z4(C(FLoH zcQGI8EB5|hus#lA5ORpOiR|TmpXWM2Z80MKTOX*@wTr9mdMmHp2*!g|D47k`>%8Q>P zDFTBkiSH2;WSzHmoi(TxGIy6@8I)vFq1dKb;?k2#@pse5CFtZ$FofW;=2=ORhD=4U z9L5frTlNJZF~4e+qKH-4LO@4FY8}>D1xE6jc#|jRT{{<$dg86m{Z?$Jm8FSHWt9Y6 zcRvx>+4C9jmeTY-`Hxz2*j0(P;3tWF79GSSFqyO>Px%e}^+>)Yb|(pd7$CZ;*Nk=U zMIU~4aS8iqgMvWy$02&FTmf+MOXKP~D+2?RdJ0$tgqy3hM!*)4eEeyl$PgN9S5V0E zH=p1Q8}0G-CGfE)m8KB!Ma9y!s%`mu4X4P5nvJJ#FS7wiOg*2#;*3u=x5cM`B^xRN zlbQ=kT}yOEpWWCQMD+bL(8n2VCV?WL)pcWZm1egl?b0uF03H&;KbWHDK?m{*3$kSb zQK!^Nx}rJiD6Z)*G7}5?cIeMd%F5o`2nO=g$y$&P>;iuw!|f)iwE;p!TE`=xG@9Jx zRqMoGKbxO7$A>^=x%D$MspoU5Wv{JqJ~HfF*wPh`3Hr)~dLi!dO6Dd}kV^aO0Ai&o z`s@q|&TW(#hnAoC})+&Usn?@kx!YHwsA(!W@I-h2fd$rlTqGGajr zsM~p)U~^}2LMQ*Q{rEBd08w2_h8AYIao$`qU^J)}Pl1g{$Ug$vQfynM8JBOggM5`0 z7)}*_95Q8heLVSrL((5gHQeETLAVg;dl)#b1LuO3oMMN{Q<);8GkPGt~o(-iXIAC`> zHIOGaFbh$od>bwF$0CA6Z=s9DKo(BSf#eO>l;zqeb}~^*6#xyFyFdbnK5fjAjExSX z+^8jWXOzfeFWjaXHlo>-6kBq5u=n-uv*U$Q=~&)(%*ycSqjUOB zTxSk2yu8M%N%mc_vpMS+u?E|1yDID`(NBs(&!?X@aKF_aHYOAM#dy^i40-Hj?{^tQ zkZH{iD%IKI$RVG40gYur^m12|FO-ke??0>C9%67FN`l_T@8H^bMdbcykyl)R$6?+= za#?FgE%r>y0ahFKitpxPSS%Sac?1h79Kgub#0X)1Z6*mNPnYUx6#XhQa6qBU-o=Ae zxKr)jA9foQ#=0EcLQQ9ZiAk-4woV7y0SCrV+jt_u5m5tQYmAou+cY`+)?hrwFlAH5yI$fhS6dyt4#*C$&XF-S$) zSl~lpkAr>|{(5GA(wU5790gbOXBqqMMoKcc$C-E{kEWU91dM!+pvyM$sc`hQXjB%N z$hETM|I^sH$3vO6aa==dq%z}tZjg{e5n5Ktn8`5FP!8j?W1L3hFbtZkZ3bgP(a223 zu|h{m8H~eQIpi2}NU3cd1}n0dY)I{U&)e>&{(S#>{<@#f_1vH5xjxVRyS~5c8lXB> z<%-71HMh4|wC-D67tWp?l8=#PL+V{8gTy_I95F#QX-1s%+`zX5Inh5MHB5~Q2#(5( z!+`=BX?jL^Ce(zN<1Vf-NUcwIB%YKC6hrdxgShw?`t z*EgQct|8Ucc#W%nLB42D%twqyM=_Knx~WD z8y{w@^c?C_iT*FS5}p^2!|GbzpVPgm9&l&dhqW0pE$>1PQ!*Zllwh$0UxXv?x#3kY z4FLuiS(SNshJ!fY&d*-a;*SoY8R4;*r%BxopFPPhWiFhQ9)TA%E_h7|OfTPi+b=u` zC8ui6bP;_`73{d+1E5Gvh1Lq}3vRM`jE%IV)FF^g1J-)&oDby~|qg#`Z4u z>2Cxx?0N@#%dpI8C@+Q5UT2fKOJL67km8?%^!G03t z*!!ydQPu;m5`U-4MV6{J^R7tJW=VR<4p8mcPbsc>gY5=>qsm=rrHS5*JuMiU-rsc& z7kf$8j;P*BC)d9rr~huKb-P{ft<}A&Q#U5G5gXHs&GjT$Nm&7%~72?3s3yE>C$B?uNoTSgZTY1?<2m!V_ku zd4C}9y}6y=QcLQp_`bNf+9qm(_uI1hfgTMMNxnEMeWkso^Eug3r-t=z%SVX>QM2ku zmV69?8L;yC>O*_~h?qnDI7+XecbZ{j`@oEBy|w<`r1mVIKh5@;h6-lkWFoQwje1$(q&Z&%(EoQNeo?SxS*;KhQTv z!-usS1?pr-$2mlkva^)zc4Bt9?Ky?aO-$DTQ=QqUMUS0gQqIv_55~7N^Z3K(&1?M) zmTs=mT}nn6@&AyXOJVD<9truI)akzn1-nHs6!W~QDG?=6Cx4G8G0Itxcz$bXDXPT9 zN;YC`X~HsEWl>{~LiW97!0-vQ`Co3+A9CE>cl(}GJR;N zGKMpqtgqEgXH|f-V*pv(m{D>bY}%if>cBFql4d#;-@|z!${aPeyt-Ph6Xb(qY_14a z&PdC$D1CBqEL%Ti+hX~sEgUtYV~BIUkxA0?PQ@-_6sb9(@exDQiC^M}reioU{yEox z8?DLl>=D%fQzzM&$ks%*13WEn@j6^nwiT42HeSgDr0) zJw6=98aCflJrCpA1xmq0V1_=724>5usRj@&Sm96pb=|jECl%UXe7bWbn5q2g z5qVg4Zohzf0OjQj8Z{Ufhfzr*6#FV;d$x+m#y7+Z_Y}VV{fnh>!AI+c?KUTZ!fGg9 zpqRDr%QImT3~ynsQ#+Ks0k@0oSglE!Jyk?`QSH8mldnbR@;4F{l@D&@><4XNgJ+ta zIb&i=RLE)G7N~fU>!sVxk+#NQK6r*bkf27^V{XAWP3%J&)6#F!uZ%PWC$vge~}%ZMeEfu75K%&nwegM{y+bfU{h{R3Qmf zO7UR_@+;0`jPo-_o69G4r&jepdId7O(Jn#*hzk73T?smVdByX8=D1Q___-e0$SQDT ze{BTLQo){EQ;Z3xiFhEBE|f~zR&{{-0JSOHJM=3(B$BT5<$>;%)^e-+JN!vgWHZZ7 zaGt;W6-W|U@zO*x8tzB;{O`~y9e_@~TdoQ#`d0)4Plp2voWiyndx6LDUuBFcQoRnn zwWxgfZLNF;D8eEqP6q#@wt)*STSQ%TG*(dlvD1n8CZs47{leSFzA1I@;2`4F#L=3* z@7C&rP`0I=$oozUJS2)(^H1&z{I|8Bx0RIZ)#%%1-!|Z;1BiHJ%5TWp@T==jBpO}? qq3o4+aj*Ec0jo?9@&C1XYrncr{-#^#m%ai#+hUGr7gQCJK>sgSwhj0I literal 0 HcmV?d00001 diff --git a/images/train.svg b/images/train.svg new file mode 100644 index 0000000..768794e --- /dev/null +++ b/images/train.svg @@ -0,0 +1 @@ +-0.100.10.20.30.40.50.60.70.80.911.11.2-20020406080100120140160180200220240 \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/base.py b/models/base.py new file mode 100644 index 0000000..a0e8f67 --- /dev/null +++ b/models/base.py @@ -0,0 +1,44 @@ +import torch +import torch.nn as nn + + +class MASRModel(nn.Module): + def __init__(self, **config): + super().__init__() + self.config = config + + @classmethod + def load(cls, path): + package = torch.load(path) + state_dict = package["state_dict"] + config = package["config"] + m = cls(**config) + m.load_state_dict(state_dict) + return m + + def to_train(self): + from .trainable import TrainableModel + + self.__class__.__bases__ = (TrainableModel,) + return self + + def predict(self, *args): + raise NotImplementedError() + + # -> texts: list, len(list) = B + def _default_decode(self, yp, yp_lens): + idxs = yp.argmax(1) + texts = [] + for idx, out_len in zip(idxs, yp_lens): + idx = idx[:out_len] + text = "" + last = None + for i in idx: + if i.item() not in (last, self.blank): + text += self.vocabulary[i.item()] + last = i + texts.append(text) + return texts + + def decode(self, *outputs): # texts -> list of size B + return self._default_decode(*outputs) diff --git a/models/conv.py b/models/conv.py new file mode 100644 index 0000000..9537093 --- /dev/null +++ b/models/conv.py @@ -0,0 +1,68 @@ +import torch +import torch.nn as nn +from torch.nn.utils import weight_norm +from .base import MASRModel +import feature + + +class ConvBlock(nn.Module): + def __init__(self, conv, p): + super().__init__() + self.conv = conv + nn.init.kaiming_normal_(self.conv.weight) + self.conv = weight_norm(self.conv) + self.act = nn.GLU(1) + self.dropout = nn.Dropout(p, inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.act(x) + x = self.dropout(x) + return x + + +class GatedConv(MASRModel): + """ This is a model between Wav2letter and Gated Convnets. + The core block of this model is Gated Convolutional Network""" + + def __init__(self, vocabulary, blank=0, name="masr"): + """ vocabulary : str : string of all labels such that vocaulary[0] == ctc_blank """ + super().__init__(vocabulary=vocabulary, name=name, blank=blank) + self.blank = blank + self.vocabulary = vocabulary + self.name = name + output_units = len(vocabulary) + modules = [] + modules.append(ConvBlock(nn.Conv1d(161, 500, 48, 2, 97), 0.2)) + + for i in range(7): + modules.append(ConvBlock(nn.Conv1d(250, 500, 7, 1), 0.3)) + + modules.append(ConvBlock(nn.Conv1d(250, 2000, 32, 1), 0.5)) + + modules.append(ConvBlock(nn.Conv1d(1000, 2000, 1, 1), 0.5)) + + modules.append(weight_norm(nn.Conv1d(1000, output_units, 1, 1))) + + self.cnn = nn.Sequential(*modules) + + def forward(self, x, lens): # -> B * V * T + x = self.cnn(x) + for module in self.modules(): + if type(module) == nn.modules.Conv1d: + lens = ( + lens - module.kernel_size[0] + 2 * module.padding[0] + ) // module.stride[0] + 1 + return x, lens + + def predict(self, path): + self.eval() + wav = feature.load_audio(path) + spec = feature.spectrogram(wav) + spec.unsqueeze_(0) + x_lens = spec.size(-1) + out = self.cnn(spec) + out_len = torch.tensor([out.size(-1)]) + text = self.decode(out, out_len) + self.train() + return text[0] diff --git a/models/trainable.py b/models/trainable.py new file mode 100644 index 0000000..9fdbf47 --- /dev/null +++ b/models/trainable.py @@ -0,0 +1,127 @@ +from .base import MASRModel +import torch +import torch.nn as nn +import torch.optim as optim +import data +from tensorboardX import SummaryWriter +from warpctc_pytorch import CTCLoss +from tqdm import tqdm +from Levenshtein import distance + + +class TrainableModel(MASRModel): + def __init__(self, **config): + super().__init__(**config) + + def save(self, path): + state_dict = self.state_dict() + config = self.config + package = {"state_dict": state_dict, "config": config} + torch.save(package, path) + + def loss(self, *pred_targets): # -> loss: scalar tensor + preds, targets = pred_targets + return self._default_loss(*preds, *targets) + + def cer(self, texts, *targets): # -> cer: float + return self._default_cer(texts, *targets) + + def _default_loss(self, yp, yp_lens, y, y_lens): # -> ctc_loss: scalar tensor + criterion = CTCLoss(size_average=True) + yp = yp.permute(2, 0, 1) # B * V * T -> T * B * V + loss = criterion(yp, y, yp_lens, y_lens) + return loss + + def _default_cer(self, texts, y, y_lens): # -> cer: float + index = 0 + cer = 0 + for text, y_len in zip(texts, y_lens): + target = y[index : (index + y_len)] + target = "".join(self.vocabulary[i] for i in target) + print(text, target) + cer += distance(text, target) / len(target) + index += y_len + cer /= len(y_lens) + return cer + + def test(self, test_index, batch_size=64): # -> cer: float + self.eval() + test_dataset = data.MASRDataset(test_index, self.vocabulary) + test_loader = data.MASRDataLoader( + test_dataset, batch_size, shuffle=False, num_workers=16 + ) + test_steps = len(test_loader) + cer = 0 + for inputs, targets in tqdm(test_loader, total=test_steps): + x, x_lens = inputs + x = x.to("cuda") + outputs = self.forward(x, x_lens) + texts = self.decode(*outputs) + cer += self.cer(texts, *targets) + cer /= test_steps + self.train() + return cer + + def fit( + self, + train_index, + dev_index, + epochs=100, + train_batch_size=64, + lr=0.6, + momentum=0.8, + grad_clip=0.2, + dev_batch_size=64, + sorta_grad=True, + tensorboard=True, + quiet=False, + ): + self.to("cuda") + self.train() + if tensorboard: + writer = SummaryWriter() + optimizer = optim.SGD(self.parameters(), lr, momentum, nesterov=True) + train_dataset = data.MASRDataset(train_index, self.vocabulary) + train_loader_shuffle = data.MASRDataLoader( + train_dataset, train_batch_size, shuffle=True, num_workers=16 + ) + if sorta_grad: + train_loader_sort = data.MASRDataLoader( + train_dataset, train_batch_size, shuffle=False, num_workers=16 + ) + train_steps = len(train_loader_shuffle) + gstep = 0 + for epoch in range(epochs): + avg_loss = 0 + if epoch == 0 and sorta_grad: + train_loader = train_loader_sort + else: + train_loader = train_loader_shuffle + for step, (inputs, targets) in enumerate(train_loader): + x, x_lens = inputs + x = x.to("cuda") + gstep += 1 + outputs = self.forward(x, x_lens) + loss = self.loss(outputs, targets) + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(self.parameters(), grad_clip) + optimizer.step() + avg_loss += loss.item() + if not quiet: + print( + "[{}/{}][{}/{}]\tLoss = {}".format( + epoch + 1, epochs, step + 1, train_steps, loss.item() + ) + ) + if tensorboard: + writer.add_scalar("loss/step", loss.item(), gstep) + cer = self.test(dev_index, dev_batch_size) + avg_loss /= train_steps + if not quiet: + print("Epoch {}\t CER = {}\t".format(epoch + 1, cer)) + if tensorboard: + writer.add_scalar("cer/epoch", cer, epoch + 1) + writer.add_scalar("loss/epoch", loss, epoch + 1) + self.save("pretrained/{}_epoch_{}.pth".format(self.name, epoch + 1)) + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3ba3f13 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +torch==1.0.1 +librosa +numpy \ No newline at end of file diff --git a/train.py b/train.py new file mode 100644 index 0000000..f019e05 --- /dev/null +++ b/train.py @@ -0,0 +1,118 @@ +import torch +import torch.nn as nn +import data +from models.conv import GatedConv +from tqdm import tqdm +from decoder import GreedyDecoder +from warpctc_pytorch import CTCLoss +import tensorboardX as tensorboard +import torch.nn.functional as F +import json + + +def train( + model, + epochs=1000, + batch_size=64, + train_index_path="data_aishell/train-sort.manifest", + dev_index_path="data_aishell/dev.manifest", + labels_path="data_aishell/labels.json", + learning_rate=0.6, + momentum=0.8, + max_grad_norm=0.2, + weight_decay=0, +): + train_dataset = data.MASRDataset(train_index_path, labels_path) + batchs = (len(train_dataset) + batch_size - 1) // batch_size + dev_dataset = data.MASRDataset(dev_index_path, labels_path) + train_dataloader = data.MASRDataLoader( + train_dataset, batch_size=batch_size, num_workers=8 + ) + train_dataloader_shuffle = data.MASRDataLoader( + train_dataset, batch_size=batch_size, num_workers=8, shuffle=True + ) + dev_dataloader = data.MASRDataLoader( + dev_dataset, batch_size=batch_size, num_workers=8 + ) + parameters = model.parameters() + optimizer = torch.optim.SGD( + parameters, + lr=learning_rate, + momentum=momentum, + nesterov=True, + weight_decay=weight_decay, + ) + ctcloss = CTCLoss(size_average=True) + # lr_sched = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.985) + writer = tensorboard.SummaryWriter() + gstep = 0 + for epoch in range(epochs): + epoch_loss = 0 + if epoch > 0: + train_dataloader = train_dataloader_shuffle + # lr_sched.step() + lr = get_lr(optimizer) + writer.add_scalar("lr/epoch", lr, epoch) + for i, (x, y, x_lens, y_lens) in enumerate(train_dataloader): + x = x.to("cuda") + out, out_lens = model(x, x_lens) + out = out.transpose(0, 1).transpose(0, 2) + loss = ctcloss(out, y, out_lens, y_lens) + optimizer.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) + optimizer.step() + epoch_loss += loss.item() + writer.add_scalar("loss/step", loss.item(), gstep) + gstep += 1 + print( + "[{}/{}][{}/{}]\tLoss = {}".format( + epoch + 1, epochs, i, int(batchs), loss.item() + ) + ) + epoch_loss = epoch_loss / batchs + cer = eval(model, dev_dataloader) + writer.add_scalar("loss/epoch", epoch_loss, epoch) + writer.add_scalar("cer/epoch", cer, epoch) + print("Epoch {}: Loss= {}, CER = {}".format(epoch, epoch_loss, cer)) + torch.save(model, "pretrained/model_{}.pth".format(epoch)) + + +def get_lr(optimizer): + for param_group in optimizer.param_groups: + return param_group["lr"] + + +def eval(model, dataloader): + model.eval() + decoder = GreedyDecoder(dataloader.dataset.labels_str) + cer = 0 + print("decoding") + with torch.no_grad(): + for i, (x, y, x_lens, y_lens) in tqdm(enumerate(dataloader)): + x = x.to("cuda") + outs, out_lens = model(x, x_lens) + outs = F.softmax(outs, 1) + outs = outs.transpose(1, 2) + ys = [] + offset = 0 + for y_len in y_lens: + ys.append(y[offset : offset + y_len]) + offset += y_len + out_strings, out_offsets = decoder.decode(outs, out_lens) + y_strings = decoder.convert_to_strings(ys) + for pred, truth in zip(out_strings, y_strings): + trans, ref = pred[0], truth[0] + cer += decoder.cer(trans, ref) / float(len(ref)) + cer /= len(dataloader.dataset) + model.train() + return cer + + +if __name__ == "__main__": + with open("data_aishell/labels.json") as f: + vocabulary = json.load(f) + vocabulary = "".join(vocabulary) + model = GatedConv(vocabulary) + model.to("cuda") + train(model)