Spaces:
Runtime error
Runtime error
| name: &name "QuartzNet15x5_ru" | |
| model: | |
| sample_rate: &sample_rate 16000 | |
| repeat: &repeat 5 | |
| dropout: &dropout 0.0 | |
| separable: &separable true | |
| labels: &labels [ " ", "а", "б", "в", "г", "д", "е", "ж", "з", "и", "й", "к", "л", "м", "н", "о", "п", | |
| "р", "с", "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ы", "ь", "э", "ю", "я" ] | |
| train_ds: | |
| manifest_filepath: golos/train/manifest.jsonl # Can be found at https://sc.link/JpD | |
| sample_rate: 16000 | |
| labels: | |
| batch_size: 32 | |
| trim_silence: True | |
| max_duration: 20.0 | |
| shuffle: True | |
| # tarred datasets | |
| is_tarred: false | |
| tarred_audio_filepaths: null | |
| shuffle_n: 2048 | |
| # bucketing params | |
| bucketing_strategy: "synced_randomized" | |
| bucketing_batch_size: null | |
| parser: ru | |
| validation_ds: | |
| manifest_filepath: golos/test/crowd/crowd.jsonl # Can be found at https://sc.link/Kqr | |
| sample_rate: 16000 | |
| labels: | |
| batch_size: 32 | |
| shuffle: False | |
| parser: ru | |
| test_ds: | |
| manifest_filepath: golos/test/farfield/farfield.jsonl # Can be found at https://sc.link/Kqr | |
| sample_rate: 16000 | |
| labels: | |
| batch_size: 32 | |
| shuffle: False | |
| parser: ru | |
| preprocessor: | |
| _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor | |
| normalize: "per_feature" | |
| window_size: 0.02 | |
| sample_rate: | |
| window_stride: 0.01 | |
| window: "hann" | |
| features: &n_mels 64 | |
| n_fft: 512 | |
| frame_splicing: 1 | |
| dither: 0.00001 | |
| spec_augment: | |
| _target_: nemo.collections.asr.modules.SpectrogramAugmentation | |
| rect_freq: 50 | |
| rect_masks: 2 | |
| rect_time: 120 | |
| encoder: | |
| _target_: nemo.collections.asr.modules.ConvASREncoder | |
| feat_in: | |
| activation: relu | |
| conv_mask: true | |
| jasper: | |
| - dilation: [1] | |
| dropout: | |
| filters: 256 | |
| kernel: [33] | |
| repeat: 1 | |
| residual: false | |
| separable: | |
| stride: [2] | |
| - dilation: [1] | |
| dropout: | |
| filters: 256 | |
| kernel: [33] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 256 | |
| kernel: [33] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 256 | |
| kernel: [33] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 256 | |
| kernel: [39] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 256 | |
| kernel: [39] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 256 | |
| kernel: [39] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 512 | |
| kernel: [51] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 512 | |
| kernel: [51] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 512 | |
| kernel: [51] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 512 | |
| kernel: [63] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 512 | |
| kernel: [63] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 512 | |
| kernel: [63] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 512 | |
| kernel: [75] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 512 | |
| kernel: [75] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: 512 | |
| kernel: [75] | |
| repeat: | |
| residual: true | |
| separable: | |
| stride: [1] | |
| - dilation: [2] | |
| dropout: | |
| filters: 512 | |
| kernel: [87] | |
| repeat: 1 | |
| residual: false | |
| separable: | |
| stride: [1] | |
| - dilation: [1] | |
| dropout: | |
| filters: &enc_filters 1024 | |
| kernel: [1] | |
| repeat: 1 | |
| residual: false | |
| stride: [1] | |
| decoder: | |
| _target_: nemo.collections.asr.modules.ConvASRDecoder | |
| feat_in: | |
| num_classes: 33 | |
| vocabulary: | |
| optim: | |
| name: novograd | |
| # _target_: nemo.core.optim.optimizers.Novograd | |
| lr: .05 | |
| # optimizer arguments | |
| betas: [0.8, 0.5] | |
| weight_decay: 0.001 | |
| # scheduler setup | |
| sched: | |
| name: CosineAnnealing | |
| # pytorch lightning args | |
| # monitor: val_loss | |
| # reduce_on_plateau: false | |
| # Scheduler params | |
| warmup_steps: null | |
| warmup_ratio: null | |
| min_lr: 0.0 | |
| last_epoch: -1 | |
| trainer: | |
| devices: 1 # number of gpus | |
| max_epochs: 5 | |
| max_steps: -1 # computed at runtime if not set | |
| num_nodes: 1 | |
| accelerator: gpu | |
| strategy: ddp | |
| accumulate_grad_batches: 1 | |
| enable_checkpointing: False # Provided by exp_manager | |
| logger: False # Provided by exp_manager | |
| log_every_n_steps: 1 # Interval of logging. | |
| val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations | |
| benchmark: false # needs to be false for models with variable-length speech input as it slows down training | |
| exp_manager: | |
| exp_dir: null | |
| name: | |
| create_tensorboard_logger: True | |
| create_checkpoint_callback: True | |
| checkpoint_callback_params: | |
| monitor: "val_wer" | |
| mode: "min" | |
| create_wandb_logger: False | |
| wandb_logger_kwargs: | |
| name: null | |
| project: null | |