diff --git a/examples/aishell/s0/conf/ds_stage2.json b/examples/aishell/s0/conf/ds_stage2.json deleted file mode 100644 index 49884009a..000000000 --- a/examples/aishell/s0/conf/ds_stage2.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "train_micro_batch_size_per_gpu": 1, - "gradient_accumulation_steps": 1, - "steps_per_print": 100, - "gradient_clipping": 0.0001, - "fp16": { - "enabled": false, - "auto_cast": false, - "loss_scale": 0, - "initial_scale_power": 8, - "loss_scale_window": 1000, - "hysteresis": 2, - "min_loss_scale": 1 - }, - "bf16": { - "enabled": false - }, - "zero_force_ds_cpu_optimizer": false, - "zero_optimization": { - "stage": 2, - "offload_optimizer": { - "device": "none", - "pin_memory": true - }, - "offload_param": { - "device": "none", - "pin_memory": true - }, - "allgather_partitions": true, - "allgather_bucket_size": 1e7, - "overlap_comm": true, - "reduce_scatter": true, - "reduce_bucket_size": 1e7, - "contiguous_gradients" : true - }, - "activation_checkpointing": { - "partition_activations": false, - "cpu_checkpointing": false, - "contiguous_memory_optimization": false, - "number_checkpoints": null, - "synchronize_checkpoint_boundary": false, - "profile": true - }, - "flops_profiler": { - "enabled": false, - "profile_step": 100, - "module_depth": -1, - "top_modules": 1, - "detailed": true, - "output_file": null - }, - "tensorboard": { - "enabled": true, - "output_path": "tensorboard/ds_logs/", - "job_name": "deepspeed" - } -} diff --git a/examples/aishell/s0/conf/train_u2++_conformer_1.8B.yaml b/examples/aishell/s0/conf/train_u2++_conformer_1.8B.yaml deleted file mode 100644 index d4de4c440..000000000 --- a/examples/aishell/s0/conf/train_u2++_conformer_1.8B.yaml +++ /dev/null @@ -1,115 +0,0 @@ -# network architecture -# encoder related -encoder: conformer -encoder_conf: - output_size: 2048 # dimension of attention - attention_heads: 16 - linear_units: 8192 # the number of units of position-wise feed forward - num_blocks: 12 # the number of encoder blocks - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - attention_dropout_rate: 0.1 - input_layer: conv2d8 # encoder input type, you can chose conv2d, conv2d6 and conv2d8 - normalize_before: true - cnn_module_kernel: 8 - use_cnn_module: True - activation_type: 'swish' - pos_enc_layer_type: 'rel_pos' - selfattention_layer_type: 'rel_selfattn' - causal: true - use_dynamic_chunk: true - cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster - use_dynamic_left_chunk: false - -# decoder related -decoder: bitransformer -decoder_conf: - attention_heads: 16 - linear_units: 8192 - num_blocks: 3 - r_num_blocks: 3 - dropout_rate: 0.1 - positional_dropout_rate: 0.1 - self_attention_dropout_rate: 0.1 - src_attention_dropout_rate: 0.1 - -tokenizer: char -tokenizer_conf: - symbol_table_path: 'data/dict/lang_char.txt' - split_with_space: false - bpe_path: null - non_lang_syms_path: null - is_multilingual: false - num_languages: 1 - special_tokens: - : 0 - : 1 - : 2 - : 2 - -ctc: ctc -ctc_conf: - ctc_blank_id: 0 - -cmvn: global_cmvn -cmvn_conf: - cmvn_file: 'data/train/global_cmvn' - is_json_cmvn: true - -# hybrid CTC/attention -model: asr_model -model_conf: - ctc_weight: 0.3 - lsm_weight: 0.1 # label smoothing option - length_normalized_loss: false - reverse_weight: 0.3 - -dataset: asr -dataset_conf: - filter_conf: - max_length: 40960 - min_length: 0 - token_max_length: 200 - token_min_length: 1 - resample_conf: - resample_rate: 16000 - speed_perturb: true - fbank_conf: - num_mel_bins: 80 - frame_shift: 10 - frame_length: 25 - dither: 1.0 - spec_aug: true - spec_aug_conf: - num_t_mask: 2 - num_f_mask: 2 - max_t: 50 - max_f: 10 - spec_sub: true - spec_sub_conf: - num_t_sub: 3 - max_t: 30 - spec_trim: false - spec_trim_conf: - max_t: 50 - shuffle: true - shuffle_conf: - shuffle_size: 1500 - sort: true - sort_conf: - sort_size: 500 # sort_size should be less than shuffle_size - batch_conf: - batch_type: 'static' # static or dynamic - batch_size: 16 - -grad_clip: 5 -accum_grad: 1 -max_epoch: 100 -log_interval: 100 - -optim: adam -optim_conf: - lr: 0.001 -scheduler: warmuplr # pytorch v1.1.0+ required -scheduler_conf: - warmup_steps: 25000