From 5e6e985cde71e8f7fda340baef0f835485e524c9 Mon Sep 17 00:00:00 2001
From: Anil Karaka <anilkaraka@live.in>
Date: Fri, 27 Sep 2019 12:37:51 +0000
Subject: [PATCH 1/6] wandb instrumentation

---
 encoder/train.py          |  4 ++--
 encoder/visualizations.py |  4 +++-
 encoder_train.py          |  4 +++-
 synthesizer/audio.py      |  6 +++++-
 synthesizer/preprocess.py |  1 -
 synthesizer/train.py      | 20 ++++++++++++++------
 synthesizer/utils/plot.py |  7 +++++--
 synthesizer_train.py      |  5 +++++
 vocoder/gen_wavernn.py    |  9 ++++++++-
 vocoder/train.py          |  4 +++-
 vocoder_train.py          |  5 ++++-
 11 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/encoder/train.py b/encoder/train.py
index 071af1b9c..1a895f431 100644
--- a/encoder/train.py
+++ b/encoder/train.py
@@ -5,6 +5,7 @@
 from utils.profiler import Profiler
 from pathlib import Path
 import torch
+import wandb
 
 def sync(device: torch.device):
     # FIXME
@@ -91,7 +92,6 @@ def train(run_id: str, clean_data_root: Path, models_dir: Path, umap_every: int,
         # Update visualizations
         # learning_rate = optimizer.param_groups[0]["lr"]
         vis.update(loss.item(), eer, step)
-        
         # Draw projections and save them to the backup folder
         if umap_every != 0 and step % umap_every == 0:
             print("Drawing and saving projections (step %d)" % step)
@@ -121,5 +121,5 @@ def train(run_id: str, clean_data_root: Path, models_dir: Path, umap_every: int,
                 "optimizer_state": optimizer.state_dict(),
             }, backup_fpath)
             
+        wandb.log({'loss': loss.item(), 'Equal error rate': eer})
         profiler.tick("Extras (visualizations, saving)")
-        
\ No newline at end of file
diff --git a/encoder/visualizations.py b/encoder/visualizations.py
index 980c74f95..81b20c1c1 100644
--- a/encoder/visualizations.py
+++ b/encoder/visualizations.py
@@ -6,6 +6,7 @@
 # import webbrowser
 import visdom
 import umap
+import wandb
 
 colormap = np.array([
     [76, 255, 0],
@@ -166,13 +167,14 @@ def draw_projections(self, embeds, utterances_per_speaker, step, out_fpath=None,
         plt.scatter(projected[:, 0], projected[:, 1], c=colors)
         plt.gca().set_aspect("equal", "datalim")
         plt.title("UMAP projection (step %d)" % step)
+        wandb.log({'projection': [wandb.Image(plt, caption="projection")]}, commit=False)
         if not self.disabled:
             self.projection_win = self.vis.matplot(plt, win=self.projection_win)
         if out_fpath is not None:
             plt.savefig(out_fpath)
+#        wandb.log({'projection': plt}, commit=False)
         plt.clf()
         
     def save(self):
         if not self.disabled:
             self.vis.save([self.env_name])
-        
\ No newline at end of file
diff --git a/encoder_train.py b/encoder_train.py
index b8740a894..e2bc16f60 100644
--- a/encoder_train.py
+++ b/encoder_train.py
@@ -2,6 +2,7 @@
 from encoder.train import train
 from pathlib import Path
 import argparse
+import wandb
 
 
 if __name__ == "__main__":
@@ -43,5 +44,6 @@
     
     # Run the training
     print_args(args, parser)
+    wandb.init()
+    wandb.config.update(args)
     train(**vars(args))
-    
\ No newline at end of file
diff --git a/synthesizer/audio.py b/synthesizer/audio.py
index 02de56555..ca41e3931 100644
--- a/synthesizer/audio.py
+++ b/synthesizer/audio.py
@@ -4,11 +4,15 @@
 import tensorflow as tf
 from scipy import signal
 from scipy.io import wavfile
-
+import wandb
 
 def load_wav(path, sr):
     return librosa.core.load(path, sr=sr)[0]
 
+def as_np_wav(wav):
+    wavy = wav * 32767 / max(0.01, np.max(np.abs(wav)))
+    return wavy.astype(np.int16)
+
 def save_wav(wav, path, sr):
     wav *= 32767 / max(0.01, np.max(np.abs(wav)))
     #proposed by @dsmiller
diff --git a/synthesizer/preprocess.py b/synthesizer/preprocess.py
index b2894aa71..2a9c0bb41 100644
--- a/synthesizer/preprocess.py
+++ b/synthesizer/preprocess.py
@@ -222,4 +222,3 @@ def create_embeddings(synthesizer_root: Path, encoder_model_fpath: Path, n_proce
     func = partial(embed_utterance, encoder_model_fpath=encoder_model_fpath)
     job = Pool(n_processes).imap(func, fpaths)
     list(tqdm(job, "Embedding", len(fpaths), unit="utterances"))
-    
\ No newline at end of file
diff --git a/synthesizer/train.py b/synthesizer/train.py
index 4fe6bbda3..03a9decfa 100644
--- a/synthesizer/train.py
+++ b/synthesizer/train.py
@@ -12,6 +12,7 @@
 import traceback
 import time
 import os
+import wandb
 
 log = infolog.log
 
@@ -206,6 +207,7 @@ def train(log_dir, args, hparams):
             
             # Training loop
             while not coord.should_stop() and step < args.tacotron_train_steps:
+                audios = []
                 start_time = time.time()
                 step, loss, opt = sess.run([global_step, model.loss, model.optimize])
                 time_window.append(time.time() - start_time)
@@ -258,6 +260,7 @@ def train(log_dir, args, hparams):
                         linear_loss = sum(linear_losses) / len(linear_losses)
                         
                         wav = audio.inv_linear_spectrogram(lin_p.T, hparams)
+                        audios.append(wandb.Audio(audio.as_np_wav(wav), caption="eval-wave-from-linear", sample_rate=hparams.sample_rate))
                         audio.save_wav(wav, os.path.join(eval_wav_dir,
                                                          "step-{}-eval-wave-from-linear.wav".format(
                                                              step)), sr=hparams.sample_rate)
@@ -288,6 +291,7 @@ def train(log_dir, args, hparams):
                     log("Saving eval log to {}..".format(eval_dir))
                     # Save some log to monitor model improvement on same unseen sequence
                     wav = audio.inv_mel_spectrogram(mel_p.T, hparams)
+                    audios.append(wandb.Audio(audio.as_np_wav(wav), caption="eval-wave-from-mel", sample_rate=hparams.sample_rate))
                     audio.save_wav(wav, os.path.join(eval_wav_dir,
                                                      "step-{}-eval-wave-from-mel.wav".format(step)),
                                    sr=hparams.sample_rate)
@@ -298,7 +302,7 @@ def train(log_dir, args, hparams):
                                                                                     time_string(),
                                                                                     step,
                                                                                     eval_loss),
-                                        max_len=t_len // hparams.outputs_per_step)
+                                        max_len=t_len // hparams.outputs_per_step, caption="eval-align")
                     plot.plot_spectrogram(mel_p, os.path.join(eval_plot_dir,
                                                               "step-{"
 															  "}-eval-mel-spectrogram.png".format(
@@ -308,7 +312,7 @@ def train(log_dir, args, hparams):
                                                                                       step,
                                                                                       eval_loss),
                                           target_spectrogram=mel_t,
-                                          max_len=t_len)
+                                          max_len=t_len, caption="eval-mel-spectrogram")
                     
                     if hparams.predict_linear:
                         plot.plot_spectrogram(lin_p, os.path.join(eval_plot_dir,
@@ -317,7 +321,7 @@ def train(log_dir, args, hparams):
                                               title="{}, {}, step={}, loss={:.5f}".format(
                                                   "Tacotron", time_string(), step, eval_loss),
                                               target_spectrogram=lin_t,
-                                              max_len=t_len, auto_aspect=True)
+                                              max_len=t_len, auto_aspect=True, caption="eval-linear-spectrogram")
                     
                     log("Eval loss for global step {}: {:.3f}".format(step, eval_loss))
                     log("Writing eval summary!")
@@ -345,6 +349,7 @@ def train(log_dir, args, hparams):
                     
                     # save griffin lim inverted wav for debug (mel -> wav)
                     wav = audio.inv_mel_spectrogram(mel_prediction.T, hparams)
+                    audios.append(wandb.Audio(audio.as_np_wav(wav), caption="wave-from-mel", sample_rate=hparams.sample_rate))
                     audio.save_wav(wav,
                                    os.path.join(wav_dir, "step-{}-wave-from-mel.wav".format(step)),
                                    sr=hparams.sample_rate)
@@ -355,7 +360,7 @@ def train(log_dir, args, hparams):
                                         title="{}, {}, step={}, loss={:.5f}".format("Tacotron",
                                                                                     time_string(),
                                                                                     step, loss),
-                                        max_len=target_length // hparams.outputs_per_step)
+                                        max_len=target_length // hparams.outputs_per_step, caption="wave-from-mel")
                     # save real and predicted mel-spectrogram plot to disk (control purposes)
                     plot.plot_spectrogram(mel_prediction, os.path.join(plot_dir,
                                                                        "step-{}-mel-spectrogram.png".format(
@@ -364,7 +369,7 @@ def train(log_dir, args, hparams):
                                                                                       time_string(),
                                                                                       step, loss),
                                           target_spectrogram=target,
-                                          max_len=target_length)
+                                          max_len=target_length, caption="mel-spectrogram")
                     log("Input at step {}: {}".format(step, sequence_to_text(input_seq)))
                 
                 if step % args.embedding_interval == 0 or step == args.tacotron_train_steps or step == 1:
@@ -377,7 +382,10 @@ def train(log_dir, args, hparams):
                                         [char_embedding_meta],
                                         checkpoint_state.model_checkpoint_path)
                     log("Tacotron Character embeddings have been updated on tensorboard!")
-            
+                if len(audios) > 0:
+                    wandb.log({"audios": audios}, commit=False)
+                wandb.log({"loss": loss, "loss_window.average": loss_window.average})
+                # wandb.tensorflow.log(stats)
             log("Tacotron training complete after {} global steps!".format(
                 args.tacotron_train_steps), slack=True)
             return save_dir
diff --git a/synthesizer/utils/plot.py b/synthesizer/utils/plot.py
index f47d2713d..622ea42af 100644
--- a/synthesizer/utils/plot.py
+++ b/synthesizer/utils/plot.py
@@ -2,6 +2,7 @@
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 import numpy as np
+import wandb
 
 
 def split_title_line(title_text, max_words=5):
@@ -12,7 +13,7 @@ def split_title_line(title_text, max_words=5):
 	seq = title_text.split()
 	return "\n".join([" ".join(seq[i:i + max_words]) for i in range(0, len(seq), max_words)])
 
-def plot_alignment(alignment, path, title=None, split_title=False, max_len=None):
+def plot_alignment(alignment, path, title=None, split_title=False, max_len=None, caption="caption"):
 	if max_len is not None:
 		alignment = alignment[:, :max_len]
 
@@ -34,11 +35,12 @@ def plot_alignment(alignment, path, title=None, split_title=False, max_len=None)
 	plt.title(title)
 	plt.ylabel("Encoder timestep")
 	plt.tight_layout()
+	wandb.log({caption: [wandb.Image(plt, caption=caption)]}, commit=False)
 	plt.savefig(path, format="png")
 	plt.close()
 
 
-def plot_spectrogram(pred_spectrogram, path, title=None, split_title=False, target_spectrogram=None, max_len=None, auto_aspect=False):
+def plot_spectrogram(pred_spectrogram, path, title=None, split_title=False, target_spectrogram=None, max_len=None, auto_aspect=False, caption="caption"):
 	if max_len is not None:
 		target_spectrogram = target_spectrogram[:max_len]
 		pred_spectrogram = pred_spectrogram[:max_len]
@@ -72,5 +74,6 @@ def plot_spectrogram(pred_spectrogram, path, title=None, split_title=False, targ
 	fig.colorbar(mappable=im, shrink=0.65, orientation="horizontal", ax=ax2)
 
 	plt.tight_layout()
+	wandb.log({caption: [wandb.Image(plt, caption=caption)]}, commit=False)
 	plt.savefig(path, format="png")
 	plt.close()
diff --git a/synthesizer_train.py b/synthesizer_train.py
index 4d46bcb80..841260559 100644
--- a/synthesizer_train.py
+++ b/synthesizer_train.py
@@ -4,6 +4,7 @@
 from synthesizer import infolog
 import argparse
 import os
+import wandb
 
 
 def prepare_run(args):
@@ -49,7 +50,11 @@ def prepare_run(args):
 							 "pairs")
     args = parser.parse_args()
     print_args(args, parser)
+    wandb.init()
+    wandb.config.update(args)
+    wandb.config.update({'training_step': 'synthesizer'})
     
     log_dir, hparams = prepare_run(args)
     
     tacotron_train(args, log_dir, hparams)
+    wandb.save(log_dir)
diff --git a/vocoder/gen_wavernn.py b/vocoder/gen_wavernn.py
index 2036737f8..34b937967 100644
--- a/vocoder/gen_wavernn.py
+++ b/vocoder/gen_wavernn.py
@@ -1,10 +1,14 @@
 from vocoder.models.fatchord_version import  WaveRNN
 from vocoder.audio import *
+import wandb
+import numpy as np
 
 
 def gen_testset(model: WaveRNN, test_set, samples, batched, target, overlap, save_path):
     k = model.get_step() // 1000
 
+    audios = []
+    gen_audios = []
     for i, (m, x) in enumerate(test_set, 1):
         if i > samples: 
             break
@@ -19,7 +23,7 @@ def gen_testset(model: WaveRNN, test_set, samples, batched, target, overlap, sav
             x = decode_mu_law(x, 2**bits, from_labels=True)
         else :
             x = label_2_float(x, bits)
-
+        audios.append(wandb.Audio(x.astype(np.float32), caption="%dk_steps_%d_target"%(k, i), sample_rate=hp.sample_rate))
         save_wav(x, save_path.joinpath("%dk_steps_%d_target.wav" % (k, i)))
         
         batch_str = "gen_batched_target%d_overlap%d" % (target, overlap) if batched else \
@@ -27,5 +31,8 @@ def gen_testset(model: WaveRNN, test_set, samples, batched, target, overlap, sav
         save_str = save_path.joinpath("%dk_steps_%d_%s.wav" % (k, i, batch_str))
 
         wav = model.generate(m, batched, target, overlap, hp.mu_law)
+        gen_audios.append(wandb.Audio(wav.astype(np.float32), caption="%dk_steps_%d_%s"%(k, i, batch_str), sample_rate=hp.sample_rate))
         save_wav(wav, save_str)
+    wandb.log({"audio": audios}, commit=False)
+    wandb.log({"generated_audio": gen_audios}, commit=False)
 
diff --git a/vocoder/train.py b/vocoder/train.py
index 8782380e9..03012a0f1 100644
--- a/vocoder/train.py
+++ b/vocoder/train.py
@@ -10,7 +10,7 @@
 import vocoder.hparams as hp
 import numpy as np
 import time
-
+import wandb
 
 def train(run_id: str, syn_dir: Path, voc_dir: Path, models_dir: Path, ground_truth: bool,
           save_every: int, backup_every: int, force_restart: bool):
@@ -112,8 +112,10 @@ def train(run_id: str, syn_dir: Path, voc_dir: Path, models_dir: Path, ground_tr
                 f"Loss: {avg_loss:.4f} | {speed:.1f} " \
                 f"steps/s | Step: {k}k | "
             stream(msg)
+            wandb.log({'loss': avg_loss})
 
 
         gen_testset(model, test_loader, hp.voc_gen_at_checkpoint, hp.voc_gen_batched,
                     hp.voc_target, hp.voc_overlap, model_dir)
         print("")
+    wandb.save(str(model_dir.resolve()))
diff --git a/vocoder_train.py b/vocoder_train.py
index d712ffa3e..24fffcb28 100644
--- a/vocoder_train.py
+++ b/vocoder_train.py
@@ -2,6 +2,7 @@
 from vocoder.train import train
 from pathlib import Path
 import argparse
+import wandb
 
 
 if __name__ == "__main__":
@@ -52,5 +53,7 @@
 
     # Run the training
     print_args(args, parser)
+    wandb.init()
+    wandb.config.update(args)
+    wandb.config.update({'training_step': 'vocoder'})
     train(**vars(args))
-    
\ No newline at end of file

From 7e3013a6f76880bb7022a82d925a14e8326e290c Mon Sep 17 00:00:00 2001
From: Anil Karaka <anilkaraka@live.in>
Date: Thu, 23 Jan 2020 04:22:12 +0000
Subject: [PATCH 2/6] working slack bot

---
 bots.py | 260 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 260 insertions(+)
 create mode 100644 bots.py

diff --git a/bots.py b/bots.py
new file mode 100644
index 000000000..22e2542d6
--- /dev/null
+++ b/bots.py
@@ -0,0 +1,260 @@
+from encoder.params_model import model_embedding_size as speaker_embedding_size
+from utils.argutils import print_args
+from synthesizer.inference import Synthesizer
+from encoder import inference as encoder
+from vocoder import inference as vocoder
+from pathlib import Path
+import numpy as np
+import librosa
+import argparse
+import torch
+import sys
+import slack, asyncio
+import string, random
+
+
+import boto3
+import logging
+from botocore.exceptions import ClientError
+
+def gen_random_str(n):
+    res = ''.join(random.choices(string.ascii_uppercase + string.digits, k=n))
+    return res
+
+def upload_file(file_name, bucket, object_name=None):
+    """Upload a file to an S3 bucket
+
+    :param file_name: File to upload
+    :param bucket: Bucket to upload to
+    :param object_name: S3 object name. If not specified then file_name is used
+    :return: True if file was uploaded, else False
+    """
+
+    # If S3 object_name was not specified, use file_name
+    if object_name is None:
+        object_name = file_name
+
+    # Upload the file
+    s3_client = boto3.client('s3')
+    try:
+        response = s3_client.upload_file(file_name, bucket, object_name, ExtraArgs={'ACL':'public-read'})
+    except ClientError as e:
+        logging.error(e)
+        return False
+    return True
+
+
+if __name__ == '__main__':
+    ## Info & args
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("-e", "--enc_model_fpath", type=Path, 
+                        default="encoder/saved_models/pretrained.pt",
+                        help="Path to a saved encoder")
+    parser.add_argument("-s", "--syn_model_dir", type=Path, 
+                        default="synthesizer/saved_models/logs-pretrained/",
+                        help="Directory containing the synthesizer model")
+    parser.add_argument("-v", "--voc_model_fpath", type=Path, 
+                        default="vocoder/saved_models/pretrained/pretrained.pt",
+                        help="Path to a saved vocoder")
+    parser.add_argument("--low_mem", action="store_true", help=\
+        "If True, the memory used by the synthesizer will be freed after each use. Adds large "
+        "overhead but allows to save some GPU memory for lower-end GPUs.")
+    parser.add_argument("--no_sound", action="store_true", help=\
+        "If True, audio won't be played.")
+    args = parser.parse_args()
+    print_args(args, parser)
+    if not args.no_sound:
+        import sounddevice as sd
+        
+    
+    ## Print some environment information (for debugging purposes)
+    print("Running a test of your configuration...\n")
+    if not torch.cuda.is_available():
+        print("Your PyTorch installation is not configured to use CUDA. If you have a GPU ready "
+              "for deep learning, ensure that the drivers are properly installed, and that your "
+              "CUDA version matches your PyTorch installation. CPU-only inference is currently "
+              "not supported.", file=sys.stderr)
+        quit(-1)
+    device_id = torch.cuda.current_device()
+    gpu_properties = torch.cuda.get_device_properties(device_id)
+    print("Found %d GPUs available. Using GPU %d (%s) of compute capability %d.%d with "
+          "%.1fGb total memory.\n" % 
+          (torch.cuda.device_count(),
+           device_id,
+           gpu_properties.name,
+           gpu_properties.major,
+           gpu_properties.minor,
+           gpu_properties.total_memory / 1e9))
+    
+    
+    ## Load the models one by one.
+    print("Preparing the encoder, the synthesizer and the vocoder...")
+    encoder.load_model(args.enc_model_fpath)
+    synthesizer = Synthesizer(args.syn_model_dir.joinpath("taco_pretrained"), low_mem=args.low_mem)
+    vocoder.load_model(args.voc_model_fpath)
+    
+    
+    ## Run a test
+    print("Testing your configuration with small inputs.")
+    # Forward an audio waveform of zeroes that lasts 1 second. Notice how we can get the encoder's
+    # sampling rate, which may differ.
+    # If you're unfamiliar with digital audio, know that it is encoded as an array of floats 
+    # (or sometimes integers, but mostly floats in this projects) ranging from -1 to 1.
+    # The sampling rate is the number of values (samples) recorded per second, it is set to
+    # 16000 for the encoder. Creating an array of length <sampling_rate> will always correspond 
+    # to an audio of 1 second.
+    print("\tTesting the encoder...")
+    encoder.embed_utterance(np.zeros(encoder.sampling_rate))
+    
+    # Create a dummy embedding. You would normally use the embedding that encoder.embed_utterance
+    # returns, but here we're going to make one ourselves just for the sake of showing that it's
+    # possible.
+    embed = np.random.rand(speaker_embedding_size)
+    # Embeddings are L2-normalized (this isn't important here, but if you want to make your own 
+    # embeddings it will be).
+    embed /= np.linalg.norm(embed)
+    # The synthesizer can handle multiple inputs with batching. Let's create another embedding to 
+    # illustrate that
+    embeds = [embed, np.zeros(speaker_embedding_size)]
+    texts = ["test 1", "test 2"]
+    print("\tTesting the synthesizer... (loading the model will output a lot of text)")
+    mels = synthesizer.synthesize_spectrograms(texts, embeds)
+    
+    # The vocoder synthesizes one waveform at a time, but it's more efficient for long ones. We 
+    # can concatenate the mel spectrograms to a single one.
+    mel = np.concatenate(mels, axis=1)
+    # The vocoder can take a callback function to display the generation. More on that later. For 
+    # now we'll simply hide it like this:
+    no_action = lambda *args: None
+    print("\tTesting the vocoder...")
+    # For the sake of making this test short, we'll pass a short target length. The target length 
+    # is the length of the wav segments that are processed in parallel. E.g. for audio sampled 
+    # at 16000 Hertz, a target length of 8000 means that the target audio will be cut in chunks of
+    # 0.5 seconds which will all be generated together. The parameters here are absurdly short, and 
+    # that has a detrimental effect on the quality of the audio. The default parameters are 
+    # recommended in general.
+    vocoder.infer_waveform(mel, target=200, overlap=50, progress_callback=no_action)
+    
+    print("All test passed! You can now synthesize speech.\n\n")
+    
+    
+    ## Interactive speech generation
+    print("This is a GUI-less example of interface to SV2TTS. The purpose of this script is to "
+          "show how you can interface this project easily with your own. See the source code for "
+          "an explanation of what is happening.\n")
+    
+    print("Interactive generation loop")
+    num_generated = 0
+    try:
+        # Get the reference audio filepath
+        message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, " \
+                  "wav, m4a, flac, ...):\n"
+        # in_fpath = Path(input(message).replace("\"", "").replace("\'", ""))
+        in_fpath = Path("/home/ubuntu/SFry.flac")
+        
+        
+        ## Computing the embedding
+        # First, we load the wav using the function that the speaker encoder provides. This is 
+        # important: there is preprocessing that must be applied.
+        
+        # The following two methods are equivalent:
+        # - Directly load from the filepath:
+        preprocessed_wav = encoder.preprocess_wav(in_fpath)
+        # - If the wav is already loaded:
+        original_wav, sampling_rate = librosa.load(in_fpath)
+        preprocessed_wav = encoder.preprocess_wav(original_wav, sampling_rate)
+        print("Loaded file succesfully")
+        
+        # Then we derive the embedding. There are many functions and parameters that the 
+        # speaker encoder interfaces. These are mostly for in-depth research. You will typically
+        # only use this function (with its default parameters):
+        embed = encoder.embed_utterance(preprocessed_wav)
+        print("Created the embedding")
+        
+        
+        ## Generating the spectrogram
+        def gen_sound(text): #text = input("Write a sentence (+-20 words) to be synthesized:\n")
+            print(text, "input text is")
+        
+            # The synthesizer works in batch, so you need to put your data in a list or numpy array
+            text = text[3:]
+            texts = [text]
+            embeds = [embed]
+            # If you know what the attention layer alignments are, you can retrieve them here by
+            # passing return_alignments=True
+            specs = synthesizer.synthesize_spectrograms(texts, embeds)
+            spec = specs[0]
+            print("Created the mel spectrogram")
+            
+            
+            ## Generating the waveform
+            print("Synthesizing the waveform:")
+            # Synthesizing the waveform is fairly straightforward. Remember that the longer the
+            # spectrogram, the more time-efficient the vocoder.
+            generated_wav = vocoder.infer_waveform(spec)
+            
+            
+            ## Post-generation
+            # There's a bug with sounddevice that makes the audio cut one second earlier, so we
+            # pad it.
+            generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
+            
+            # Save it on the disk
+            fpath = gen_random_str(7) + ".wav"
+            print(generated_wav.dtype)
+            librosa.output.write_wav(fpath, generated_wav.astype(np.float32), 
+                                     synthesizer.sample_rate)
+            print("\nSaved output as %s\n\n" % fpath)
+            upload_file(fpath, "wandbanil")
+            # return "https://wandbanil.s3.amazonaws.com/" + fpath
+            return fpath
+
+        @slack.RTMClient.run_on(event='message')
+        async def say_hello(**payload):
+            data = payload['data']
+            web_client = payload['web_client']
+            rtm_client = payload['rtm_client']
+            print("@@@@@@@@@@")
+            print(data.keys())
+            if 'Say' in data.get('text', []) and 'user' in data:
+                print(data.get('text', []))
+                url = gen_sound(data.get('text', []))
+                channel_id = data['channel']
+                thread_ts = data['ts']
+                user = data['user']
+               #  await web_client.chat_postMessage(
+               #      channel=channel_id,
+               #      text=f"Hi <@{user}>! " + url,
+               #      thread_ts=thread_ts
+               #  )
+                await web_client.files_upload(
+                    channels=channel_id,
+                    file=url,
+                    title="Stephen Fry Says",
+                    filetype='wav',
+                    thread_ts=thread_ts
+                )
+            elif 'user' in data and 'display_as_bot' not in data:
+                channel_id = data['channel']
+                thread_ts = data['ts']
+                user = data['user']
+                await web_client.chat_postMessage(
+                    channel=channel_id,
+                    text=f"Hi <@{user}>! If you want to use this bot, use the command Say and type something that you want to hear in Stephen Fry's voice",
+                    thread_ts=thread_ts
+                )
+        
+        slack_token = "xoxb-18246647890-807213202769-Zl3pNJarAHWzEnveBos8T4wE"
+        loop = asyncio.get_event_loop()
+        rtm_client = slack.RTMClient(token=slack_token, run_async=True, loop=loop)
+        loop.run_until_complete(rtm_client.start())
+
+
+        
+        
+    except Exception as e:
+        print("Caught exception: %s" % repr(e))
+        print("Restarting\n")
+    

From 74c5a8f2c8c834c86c1eb93e1a2cc7200790dc5d Mon Sep 17 00:00:00 2001
From: Anil Karaka <anilkaraka@live.in>
Date: Mon, 27 Jan 2020 02:56:23 +0000
Subject: [PATCH 3/6] working slack bot

---
 bots.py | 81 ++++++++++++++++-----------------------------------------
 1 file changed, 23 insertions(+), 58 deletions(-)

diff --git a/bots.py b/bots.py
index 22e2542d6..072f345b8 100644
--- a/bots.py
+++ b/bots.py
@@ -12,12 +12,25 @@
 import slack, asyncio
 import string, random
 
+import tweepy
+from config import create_api
 
+import time
 import boto3
 import logging
 from botocore.exceptions import ClientError
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+api = create_api()
+since_id = 1 
+
 def gen_random_str(n):
+    """
+    Helper function to generate a random string to use
+    as a file name of the audio generated.
+    """
     res = ''.join(random.choices(string.ascii_uppercase + string.digits, k=n))
     return res
 
@@ -96,57 +109,8 @@ def upload_file(file_name, bucket, object_name=None):
     vocoder.load_model(args.voc_model_fpath)
     
     
-    ## Run a test
-    print("Testing your configuration with small inputs.")
-    # Forward an audio waveform of zeroes that lasts 1 second. Notice how we can get the encoder's
-    # sampling rate, which may differ.
-    # If you're unfamiliar with digital audio, know that it is encoded as an array of floats 
-    # (or sometimes integers, but mostly floats in this projects) ranging from -1 to 1.
-    # The sampling rate is the number of values (samples) recorded per second, it is set to
-    # 16000 for the encoder. Creating an array of length <sampling_rate> will always correspond 
-    # to an audio of 1 second.
-    print("\tTesting the encoder...")
-    encoder.embed_utterance(np.zeros(encoder.sampling_rate))
-    
-    # Create a dummy embedding. You would normally use the embedding that encoder.embed_utterance
-    # returns, but here we're going to make one ourselves just for the sake of showing that it's
-    # possible.
-    embed = np.random.rand(speaker_embedding_size)
-    # Embeddings are L2-normalized (this isn't important here, but if you want to make your own 
-    # embeddings it will be).
-    embed /= np.linalg.norm(embed)
-    # The synthesizer can handle multiple inputs with batching. Let's create another embedding to 
-    # illustrate that
-    embeds = [embed, np.zeros(speaker_embedding_size)]
-    texts = ["test 1", "test 2"]
-    print("\tTesting the synthesizer... (loading the model will output a lot of text)")
-    mels = synthesizer.synthesize_spectrograms(texts, embeds)
-    
-    # The vocoder synthesizes one waveform at a time, but it's more efficient for long ones. We 
-    # can concatenate the mel spectrograms to a single one.
-    mel = np.concatenate(mels, axis=1)
-    # The vocoder can take a callback function to display the generation. More on that later. For 
-    # now we'll simply hide it like this:
-    no_action = lambda *args: None
-    print("\tTesting the vocoder...")
-    # For the sake of making this test short, we'll pass a short target length. The target length 
-    # is the length of the wav segments that are processed in parallel. E.g. for audio sampled 
-    # at 16000 Hertz, a target length of 8000 means that the target audio will be cut in chunks of
-    # 0.5 seconds which will all be generated together. The parameters here are absurdly short, and 
-    # that has a detrimental effect on the quality of the audio. The default parameters are 
-    # recommended in general.
-    vocoder.infer_waveform(mel, target=200, overlap=50, progress_callback=no_action)
+   
     
-    print("All test passed! You can now synthesize speech.\n\n")
-    
-    
-    ## Interactive speech generation
-    print("This is a GUI-less example of interface to SV2TTS. The purpose of this script is to "
-          "show how you can interface this project easily with your own. See the source code for "
-          "an explanation of what is happening.\n")
-    
-    print("Interactive generation loop")
-    num_generated = 0
     try:
         # Get the reference audio filepath
         message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, " \
@@ -165,7 +129,7 @@ def upload_file(file_name, bucket, object_name=None):
         # - If the wav is already loaded:
         original_wav, sampling_rate = librosa.load(in_fpath)
         preprocessed_wav = encoder.preprocess_wav(original_wav, sampling_rate)
-        print("Loaded file succesfully")
+        print("Loaded Stephen Fry reference file succesfully")
         
         # Then we derive the embedding. There are many functions and parameters that the 
         # speaker encoder interfaces. These are mostly for in-depth research. You will typically
@@ -211,15 +175,16 @@ def gen_sound(text): #text = input("Write a sentence (+-20 words) to be synthesi
             # return "https://wandbanil.s3.amazonaws.com/" + fpath
             return fpath
 
+
+        ###########################################
+        ####### SLACK DM BOT ######################
+        ###########################################
         @slack.RTMClient.run_on(event='message')
         async def say_hello(**payload):
             data = payload['data']
             web_client = payload['web_client']
             rtm_client = payload['rtm_client']
-            print("@@@@@@@@@@")
-            print(data.keys())
             if 'Say' in data.get('text', []) and 'user' in data:
-                print(data.get('text', []))
                 url = gen_sound(data.get('text', []))
                 channel_id = data['channel']
                 thread_ts = data['ts']
@@ -246,14 +211,14 @@ async def say_hello(**payload):
                     thread_ts=thread_ts
                 )
         
-        slack_token = "xoxb-18246647890-807213202769-Zl3pNJarAHWzEnveBos8T4wE"
+        my_slack_token = "your slack user bot token"
         loop = asyncio.get_event_loop()
-        rtm_client = slack.RTMClient(token=slack_token, run_async=True, loop=loop)
+        rtm_client = slack.RTMClient(token=my_slack_token, run_async=True, loop=loop)
         loop.run_until_complete(rtm_client.start())
 
-
-        
+        print("Loop Started")
         
+         
     except Exception as e:
         print("Caught exception: %s" % repr(e))
         print("Restarting\n")

From e8269eff52520283efc9e0bc2a3d076a57cb0ce8 Mon Sep 17 00:00:00 2001
From: Anil Karaka <anilkaraka@live.in>
Date: Mon, 27 Jan 2020 03:03:58 +0000
Subject: [PATCH 4/6] conda environment file

---
 environment.yml | 204 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 204 insertions(+)
 create mode 100644 environment.yml

diff --git a/environment.yml b/environment.yml
new file mode 100644
index 000000000..ba5403bf3
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,204 @@
+name: wandb
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - asn1crypto=1.2.0=py37_0
+  - attrs=19.3.0=py_0
+  - backcall=0.1.0=py37_0
+  - blas=1.0=mkl
+  - bleach=3.1.0=py_0
+  - boto3=1.10.39=py_0
+  - botocore=1.13.39=py_0
+  - ca-certificates=2019.11.27=0
+  - certifi=2019.11.28=py37_0
+  - cffi=1.12.3=py37h2e261b9_0
+  - cryptography=2.8=py37h1ba5d50_0
+  - cudatoolkit=10.0.130=0
+  - dbus=1.13.12=h746ee38_0
+  - decorator=4.4.1=py_0
+  - defusedxml=0.6.0=py_0
+  - docutils=0.15.2=py37_0
+  - entrypoints=0.3=py37_0
+  - expat=2.2.6=he6710b0_0
+  - fontconfig=2.13.0=h9420a91_0
+  - freetype=2.9.1=h8a8886c_1
+  - glib=2.63.1=h5a9c865_0
+  - gmp=6.1.2=h6c8ec71_1
+  - gst-plugins-base=1.14.0=hbbd80ab_1
+  - gstreamer=1.14.0=hb453b48_1
+  - icu=58.2=h9c2bf20_1
+  - idna=2.8=py37_0
+  - importlib_metadata=1.3.0=py37_0
+  - intel-openmp=2019.4=243
+  - ipykernel=5.1.3=py37h39e3cac_0
+  - ipython=7.10.2=py37h39e3cac_0
+  - ipython_genutils=0.2.0=py37_0
+  - ipywidgets=7.5.1=py_0
+  - jedi=0.15.1=py37_0
+  - jinja2=2.10.3=py_0
+  - jmespath=0.9.4=py_0
+  - jpeg=9b=h024ee3a_2
+  - jsonschema=3.2.0=py37_0
+  - jupyter=1.0.0=py37_7
+  - jupyter_client=5.3.4=py37_0
+  - jupyter_console=6.0.0=py37_0
+  - jupyter_core=4.6.1=py37_0
+  - libedit=3.1.20181209=hc058e9b_0
+  - libffi=3.2.1=hd88cf55_4
+  - libgcc-ng=9.1.0=hdf63c60_0
+  - libgfortran-ng=7.3.0=hdf63c60_0
+  - libpng=1.6.37=hbc83047_0
+  - libsodium=1.0.16=h1bed415_0
+  - libstdcxx-ng=9.1.0=hdf63c60_0
+  - libtiff=4.0.10=h2733197_2
+  - libuuid=1.0.3=h1bed415_2
+  - libxcb=1.13=h1bed415_1
+  - libxml2=2.9.9=hea5a465_1
+  - markupsafe=1.1.1=py37h7b6447c_0
+  - mistune=0.8.4=py37h7b6447c_0
+  - mkl=2019.4=243
+  - mkl-service=2.3.0=py37he904b0f_0
+  - mkl_fft=1.0.14=py37ha843d7b_0
+  - mkl_random=1.1.0=py37hd6b4f25_0
+  - more-itertools=8.0.2=py_0
+  - nbconvert=5.6.1=py37_0
+  - nbformat=4.4.0=py37_0
+  - ncurses=6.1=he6710b0_1
+  - ninja=1.9.0=py37hfd86e86_0
+  - notebook=6.0.2=py37_0
+  - numpy=1.17.2=py37haad9e8e_0
+  - numpy-base=1.17.2=py37hde5b4d6_0
+  - olefile=0.46=py37_0
+  - openssl=1.1.1d=h7b6447c_3
+  - pandoc=2.2.3.2=0
+  - pandocfilters=1.4.2=py37_1
+  - parso=0.5.2=py_0
+  - pcre=8.43=he6710b0_0
+  - pexpect=4.7.0=py37_0
+  - pickleshare=0.7.5=py37_0
+  - pillow=6.1.0=py37h34e0f95_0
+  - pip=19.2.3=py37_0
+  - prometheus_client=0.7.1=py_0
+  - prompt_toolkit=2.0.10=py_0
+  - ptyprocess=0.6.0=py37_0
+  - pycparser=2.19=py37_0
+  - pygments=2.5.2=py_0
+  - pyopenssl=19.1.0=py37_0
+  - pyqt=5.9.2=py37h05f1152_2
+  - pyrsistent=0.15.6=py37h7b6447c_0
+  - pysocks=1.7.1=py37_0
+  - python=3.7.4=h265db76_1
+  - python-dateutil=2.8.0=py37_0
+  - pyzmq=18.1.0=py37he6710b0_0
+  - qt=5.9.7=h5867ecd_1
+  - qtconsole=4.6.0=py_0
+  - readline=7.0=h7b6447c_5
+  - s3transfer=0.2.1=py37_0
+  - send2trash=1.5.0=py37_0
+  - setuptools=41.2.0=py37_0
+  - sip=4.19.8=py37hf484d3e_0
+  - six=1.12.0=py37_0
+  - sqlite=3.29.0=h7b6447c_0
+  - terminado=0.8.3=py37_0
+  - testpath=0.4.4=py_0
+  - tk=8.6.8=hbc83047_0
+  - tornado=6.0.3=py37h7b6447c_0
+  - traitlets=4.3.3=py37_0
+  - urllib3=1.25.7=py37_0
+  - wcwidth=0.1.7=py37_0
+  - webencodings=0.5.1=py37_1
+  - wheel=0.33.6=py37_0
+  - widgetsnbextension=3.5.1=py37_0
+  - xz=5.2.4=h14c3975_4
+  - zeromq=4.3.1=he6710b0_3
+  - zipp=0.6.0=py_0
+  - zlib=1.2.11=h7b6447c_3
+  - zstd=1.3.7=h0b5b093_0
+  - pytorch=1.2.0=py3.7_cuda10.0.130_cudnn7.6.2_0
+  - torchvision=0.4.0=py37_cu100
+  - pip:
+    - absl-py==0.8.0
+    - aiohttp==3.6.2
+    - argh==0.26.2
+    - astor==0.8.0
+    - async-timeout==3.0.1
+    - audioread==2.1.8
+    - awscli==1.16.309
+    - chardet==3.0.4
+    - click==7.0
+    - colorama==0.4.1
+    - configparser==4.0.2
+    - cycler==0.10.0
+    - dill==0.3.0
+    - docker-pycreds==0.4.0
+    - gast==0.3.2
+    - gitdb2==2.0.5
+    - gitpython==3.0.2
+    - google-pasta==0.1.7
+    - gql==0.1.0
+    - graphql-core==2.2.1
+    - grpcio==1.23.0
+    - h5py==2.10.0
+    - inflect==2.1.0
+    - joblib==0.13.2
+    - jsonpatch==1.24
+    - jsonpointer==2.0
+    - keras-applications==1.0.8
+    - keras-preprocessing==1.1.0
+    - kiwisolver==1.1.0
+    - librosa==0.7.0
+    - llvmlite==0.29.0
+    - markdown==3.1.1
+    - matplotlib==3.1.1
+    - multidict==4.7.2
+    - multiprocess==0.70.8
+    - numba==0.45.1
+    - nvidia-ml-py3==7.352.0
+    - oauthlib==3.1.0
+    - pathtools==0.1.2
+    - promise==2.2.1
+    - protobuf==3.9.2
+    - psutil==5.6.3
+    - pyasn1==0.4.8
+    - pyparsing==2.4.2
+    - pyqt5==5.13.1
+    - pyqt5-sip==4.19.19
+    - pyyaml==5.1.2
+    - requests==2.22.0
+    - requests-oauthlib==1.3.0
+    - resampy==0.2.2
+    - rsa==3.4.2
+    - rx==1.6.1
+    - scikit-learn==0.21.3
+    - scipy==1.3.1
+    - sentry-sdk==0.12.2
+    - shortuuid==0.5.0
+    - slackbot==0.5.6
+    - slackclient==2.5.0
+    - slacker==0.13.0
+    - smmap2==2.0.5
+    - sounddevice==0.3.14
+    - soundfile==0.10.2
+    - subprocess32==3.5.4
+    - tensorboard==1.14.0
+    - tensorflow-estimator==1.14.0
+    - tensorflow-gpu==1.14.0
+    - termcolor==1.1.0
+    - torch==1.2.0
+    - torchfile==0.1.0
+    - tqdm==4.36.1
+    - tweepy==3.8.0
+    - umap-learn==0.3.10
+    - unidecode==1.1.1
+    - visdom==0.1.8.9
+    - wandb==0.8.12
+    - watchdog==0.9.0
+    - webrtcvad==2.0.10
+    - websocket-client==0.44.0
+    - werkzeug==0.16.0
+    - wrapt==1.11.2
+    - yarl==1.4.2
+prefix: /home/ubuntu/anaconda3/envs/wandb
+

From 05a49f01d6c1ee5e0e914e842a585b7594cff0aa Mon Sep 17 00:00:00 2001
From: Anil Karaka <anilkaraka@live.in>
Date: Mon, 27 Jan 2020 03:05:45 +0000
Subject: [PATCH 5/6] working twitter bot

---
 twitterbot.py    | 216 +++++++++++++++++++++++++++++++++++++++++++++++
 twitterconfig.py |  26 ++++++
 2 files changed, 242 insertions(+)
 create mode 100644 twitterbot.py
 create mode 100644 twitterconfig.py

diff --git a/twitterbot.py b/twitterbot.py
new file mode 100644
index 000000000..5e1e2bfd4
--- /dev/null
+++ b/twitterbot.py
@@ -0,0 +1,216 @@
+from encoder.params_model import model_embedding_size as speaker_embedding_size
+from utils.argutils import print_args
+from synthesizer.inference import Synthesizer
+from encoder import inference as encoder
+from vocoder import inference as vocoder
+from pathlib import Path
+import numpy as np
+import librosa
+import argparse
+import torch
+import sys
+import slack, asyncio
+import string, random
+
+import tweepy
+from twitterconfig import create_api
+
+import time, datetime
+import boto3
+import logging
+from botocore.exceptions import ClientError
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger()
+
+api = create_api()
+since_id = 1 
+
+def gen_random_str(n):
+    """
+    Helper function to generate random string to use as a filename of the audio file generated
+    """
+    res = ''.join(random.choices(string.ascii_uppercase + string.digits, k=n))
+    return res
+
+def upload_file(file_name, bucket, object_name=None):
+    """Upload a file to an S3 bucket
+
+    :param file_name: File to upload
+    :param bucket: Bucket to upload to
+    :param object_name: S3 object name. If not specified then file_name is used
+    :return: True if file was uploaded, else False
+    """
+
+    # If S3 object_name was not specified, use file_name
+    if object_name is None:
+        object_name = file_name
+
+    # Upload the file
+    s3_client = boto3.client('s3')
+    try:
+        response = s3_client.upload_file(file_name, bucket, object_name, ExtraArgs={'ACL':'public-read'})
+    except ClientError as e:
+        logging.error(e)
+        return False
+    return True
+
+
+if __name__ == '__main__':
+    ## Info & args
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("-e", "--enc_model_fpath", type=Path, 
+                        default="encoder/saved_models/pretrained.pt",
+                        help="Path to a saved encoder")
+    parser.add_argument("-s", "--syn_model_dir", type=Path, 
+                        default="synthesizer/saved_models/logs-pretrained/",
+                        help="Directory containing the synthesizer model")
+    parser.add_argument("-v", "--voc_model_fpath", type=Path, 
+                        default="vocoder/saved_models/pretrained/pretrained.pt",
+                        help="Path to a saved vocoder")
+    parser.add_argument("--low_mem", action="store_true", help=\
+        "If True, the memory used by the synthesizer will be freed after each use. Adds large "
+        "overhead but allows to save some GPU memory for lower-end GPUs.")
+    parser.add_argument("--no_sound", action="store_true", help=\
+        "If True, audio won't be played.")
+    args = parser.parse_args()
+    print_args(args, parser)
+    if not args.no_sound:
+        import sounddevice as sd
+        
+    
+    ## Print some environment information (for debugging purposes)
+    print("Running a test of your configuration...\n")
+    if not torch.cuda.is_available():
+        print("Your PyTorch installation is not configured to use CUDA. If you have a GPU ready "
+              "for deep learning, ensure that the drivers are properly installed, and that your "
+              "CUDA version matches your PyTorch installation. CPU-only inference is currently "
+              "not supported.", file=sys.stderr)
+        quit(-1)
+    device_id = torch.cuda.current_device()
+    gpu_properties = torch.cuda.get_device_properties(device_id)
+    print("Found %d GPUs available. Using GPU %d (%s) of compute capability %d.%d with "
+          "%.1fGb total memory.\n" % 
+          (torch.cuda.device_count(),
+           device_id,
+           gpu_properties.name,
+           gpu_properties.major,
+           gpu_properties.minor,
+           gpu_properties.total_memory / 1e9))
+    
+    
+    ## Load the models one by one.
+    print("Preparing the encoder, the synthesizer and the vocoder...")
+    encoder.load_model(args.enc_model_fpath)
+    synthesizer = Synthesizer(args.syn_model_dir.joinpath("taco_pretrained"), low_mem=args.low_mem)
+    vocoder.load_model(args.voc_model_fpath)
+    
+   
+    
+    try:
+        # Get the reference audio filepath
+        message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, " \
+                  "wav, m4a, flac, ...):\n"
+        # in_fpath = Path(input(message).replace("\"", "").replace("\'", ""))
+        in_fpath = Path("/home/ubuntu/SFry.flac")
+        
+        
+        ## Computing the embedding
+        # First, we load the wav using the function that the speaker encoder provides. This is 
+        # important: there is preprocessing that must be applied.
+        
+        # The following two methods are equivalent:
+        # - Directly load from the filepath:
+        preprocessed_wav = encoder.preprocess_wav(in_fpath)
+        # - If the wav is already loaded:
+        original_wav, sampling_rate = librosa.load(in_fpath)
+        preprocessed_wav = encoder.preprocess_wav(original_wav, sampling_rate)
+        print("Loaded Stephen Fry reference file succesfully")
+        
+        # Then we derive the embedding. There are many functions and parameters that the 
+        # speaker encoder interfaces. These are mostly for in-depth research. You will typically
+        # only use this function (with its default parameters):
+        embed = encoder.embed_utterance(preprocessed_wav)
+        print("Created the embedding")
+        
+        
+        ## Generating the spectrogram
+        def gen_sound(text): #text = input("Write a sentence (+-20 words) to be synthesized:\n")
+            print(text, "input text is")
+        
+            # The synthesizer works in batch, so you need to put your data in a list or numpy array
+            text = ' '.join(text.split()[2:])
+            texts = [text]
+            embeds = [embed]
+            # If you know what the attention layer alignments are, you can retrieve them here by
+            # passing return_alignments=True
+            specs = synthesizer.synthesize_spectrograms(texts, embeds)
+            spec = specs[0]
+            print("Created the mel spectrogram")
+            
+            
+            ## Generating the waveform
+            print("Synthesizing the waveform:")
+            # Synthesizing the waveform is fairly straightforward. Remember that the longer the
+            # spectrogram, the more time-efficient the vocoder.
+            generated_wav = vocoder.infer_waveform(spec)
+            
+            
+            ## Post-generation
+            # There's a bug with sounddevice that makes the audio cut one second earlier, so we
+            # pad it.
+            generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
+            
+            # Save it on the disk
+            fpath = gen_random_str(7) + ".wav"
+            print(generated_wav.dtype)
+            librosa.output.write_wav(fpath, generated_wav.astype(np.float32), 
+                                     synthesizer.sample_rate)
+            print("\nSaved output as %s\n\n" % fpath)
+            upload_file(fpath, "wandbanil")
+            # return "https://wandbanil.s3.amazonaws.com/" + fpath
+            return fpath
+
+
+        #####################################
+        ### TWITTER MENTIONS BOT ############
+        #####################################
+        now = datetime.datetime.now()
+        def check_mentions(api, keywords, since_id):
+            logger.info("Retrieving mentions")
+            print("Retrieving mentions")
+            print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
+            new_since_id = since_id
+            for tweet in tweepy.Cursor(api.mentions_timeline,
+                since_id=since_id).items():
+                # print(tweet)
+                print(tweet.text.lower())
+                new_since_id = max(tweet.id, new_since_id)
+                if tweet.in_reply_to_status_id is not None:
+                    continue
+                print(tweet.created_at < now)
+                print(tweet.created_at)
+                print(now)
+                if tweet.created_at < now:
+                    continue
+                if any(keyword in tweet.text.lower() for keyword in keywords):
+                    logger.info(f"Answering to {tweet.user.name}")
+                    url = gen_sound(tweet.text)
+                    api.update_status(
+                        status="https://wandbanil.s3.amazonaws.com/" + url,
+                        in_reply_to_status_id=tweet.id,
+                    )
+            return new_since_id
+        while True:
+            since_id = check_mentions(api, ["say"], since_id)
+            logger.info("Waiting...")
+            print("Waiting...")
+            time.sleep(60)
+         
+         
+    except Exception as e:
+        print("Caught exception: %s" % repr(e))
+        print("Restarting\n")
+    
diff --git a/twitterconfig.py b/twitterconfig.py
new file mode 100644
index 000000000..7e88355e1
--- /dev/null
+++ b/twitterconfig.py
@@ -0,0 +1,26 @@
+import tweepy
+import logging
+import os
+
+logger = logging.getLogger()
+
+def create_api():
+    """
+    Use your secret twitter keys here instead. These are fake
+    """
+    consumer_key = "mB99fjW7s4vL61JYe" # os.getenv("CONSUMER_KEY")
+    consumer_secret = "5cm6xk8VC2ZkMi28HioS3oletFEEzJGUbGw" # os.getenv("CONSUMER_SECRET")
+    access_token = "236084SuIM4ujFN8F" # os.getenv("ACCESS_TOKEN")
+    access_token_secret = "YZNJHPUbOhfJ5KkBswYfriP6s6KHU" # os.getenv("ACCESS_TOKEN_SECRET")
+
+    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
+    auth.set_access_token(access_token, access_token_secret)
+    api = tweepy.API(auth, wait_on_rate_limit=True, 
+        wait_on_rate_limit_notify=True)
+    try:
+        api.verify_credentials()
+    except Exception as e:
+        logger.error("Error creating API", exc_info=True)
+        raise e
+    logger.info("API created")
+    return api

From 763ccf757347062439ca577a35bd2ac02ff77771 Mon Sep 17 00:00:00 2001
From: Anil Karaka <anilkaraka@live.in>
Date: Thu, 12 Mar 2020 23:29:47 +0000
Subject: [PATCH 6/6] commit untracked files

---
 demo_cli.py |  8 ++++----
 server.py   | 28 ++++++++++++++++++++++++++++
 slackbot.py | 29 +++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 4 deletions(-)
 create mode 100644 server.py
 create mode 100644 slackbot.py

diff --git a/demo_cli.py b/demo_cli.py
index 57bb001c0..21e038d8b 100644
--- a/demo_cli.py
+++ b/demo_cli.py
@@ -167,9 +167,9 @@
             generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
             
             # Play the audio (non-blocking)
-            if not args.no_sound:
-                sd.stop()
-                sd.play(generated_wav, synthesizer.sample_rate)
+            # if not args.no_sound:
+            #     sd.stop()
+            #     sd.play(generated_wav, synthesizer.sample_rate)
                 
             # Save it on the disk
             fpath = "demo_output_%02d.wav" % num_generated
@@ -183,4 +183,4 @@
         except Exception as e:
             print("Caught exception: %s" % repr(e))
             print("Restarting\n")
-        
\ No newline at end of file
+        
diff --git a/server.py b/server.py
new file mode 100644
index 000000000..2afa89e31
--- /dev/null
+++ b/server.py
@@ -0,0 +1,28 @@
+from http.server import HTTPServer, BaseHTTPRequestHandler
+
+from io import BytesIO
+
+
+class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
+
+    def do_GET(self):
+        self.send_response(200)
+        self.end_headers()
+        self.wfile.write(b'Hello, world!')
+
+    def do_POST(self):
+        content_length = int(self.headers['Content-Length'])
+        print("we re in post")
+        body = self.rfile.read(content_length)
+        print(body)
+        self.send_response(200)
+        self.end_headers()
+        response = BytesIO()
+        response.write(b'This is POST request. ')
+        response.write(b'Received: ')
+        response.write(body)
+        self.wfile.write(response.getvalue())
+
+
+httpd = HTTPServer(('localhost', 8000), SimpleHTTPRequestHandler)
+httpd.serve_forever()
diff --git a/slackbot.py b/slackbot.py
new file mode 100644
index 000000000..9c2a72b98
--- /dev/null
+++ b/slackbot.py
@@ -0,0 +1,29 @@
+import os
+import time
+import re
+from slackclient import SlackClient
+
+# instantiate Slack client
+slack_client = SlackClient("xoxb-18246647890-807213202769-Zl3pNJarAHWzEnveBos8T4wE")
+# starterbot's user ID in Slack: value is assigned after the bot starts up
+starterbot_id = None
+
+# constants
+RTM_READ_DELAY = 1 # 1 second delay between reading from RTM
+EXAMPLE_COMMAND = "do"
+MENTION_REGEX = "^<@(|[WU].+?)>(.*)"
+
+
+
+if __name__ == "__main__":
+    if slack_client.rtm_connect(with_team_state=False):
+        print("Starter Bot connected and running!")
+        # Read bot's user ID by calling Web API method `auth.test`
+        starterbot_id = slack_client.api_call("auth.test")["user_id"]
+        while True:
+            command, channel = parse_bot_commands(slack_client.rtm_read())
+            if command:
+                handle_command(command, channel)
+            time.sleep(RTM_READ_DELAY)
+    else:
+        print("Connection failed. Exception traceback printed above.")