Spaces:

subhankarg
/

MagpieTTS_Internal_Demo

Runtime error

App Files Files Community

MagpieTTS_Internal_Demo / examples /asr /conf /quartznet /quartznet_15x5_ru.yaml

subhankarg

Upload folder using huggingface_hub

0558aa4 verified 9 days ago

raw

history blame contribute delete

6.27 kB

	name: &name "QuartzNet15x5_ru"

	model:
	sample_rate: &sample_rate 16000
	repeat: &repeat 5
	dropout: &dropout 0.0
	separable: &separable true
	labels: &labels [ " ", "а", "б", "в", "г", "д", "е", "ж", "з", "и", "й", "к", "л", "м", "н", "о", "п",
	"р", "с", "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ы", "ь", "э", "ю", "я" ]

	train_ds:
	manifest_filepath: golos/train/manifest.jsonl # Can be found at https://sc.link/JpD
	sample_rate: 16000
	labels: *labels
	batch_size: 32
	trim_silence: True
	max_duration: 20.0
	shuffle: True
	# tarred datasets
	is_tarred: false
	tarred_audio_filepaths: null
	shuffle_n: 2048
	# bucketing params
	bucketing_strategy: "synced_randomized"
	bucketing_batch_size: null
	parser: ru

	validation_ds:
	manifest_filepath: golos/test/crowd/crowd.jsonl # Can be found at https://sc.link/Kqr
	sample_rate: 16000
	labels: *labels
	batch_size: 32
	shuffle: False
	parser: ru

	test_ds:
	manifest_filepath: golos/test/farfield/farfield.jsonl # Can be found at https://sc.link/Kqr
	sample_rate: 16000
	labels: *labels
	batch_size: 32
	shuffle: False
	parser: ru

	preprocessor:
	_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
	normalize: "per_feature"
	window_size: 0.02
	sample_rate: *sample_rate
	window_stride: 0.01
	window: "hann"
	features: &n_mels 64
	n_fft: 512
	frame_splicing: 1
	dither: 0.00001

	spec_augment:
	_target_: nemo.collections.asr.modules.SpectrogramAugmentation
	rect_freq: 50
	rect_masks: 2
	rect_time: 120

	encoder:
	_target_: nemo.collections.asr.modules.ConvASREncoder
	feat_in: *n_mels
	activation: relu
	conv_mask: true

	jasper:
	- dilation: [1]
	dropout: *dropout
	filters: 256
	kernel: [33]
	repeat: 1
	residual: false
	separable: *separable
	stride: [2]

	- dilation: [1]
	dropout: *dropout
	filters: 256
	kernel: [33]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 256
	kernel: [33]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 256
	kernel: [33]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 256
	kernel: [39]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 256
	kernel: [39]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 256
	kernel: [39]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 512
	kernel: [51]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 512
	kernel: [51]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 512
	kernel: [51]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 512
	kernel: [63]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 512
	kernel: [63]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 512
	kernel: [63]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 512
	kernel: [75]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 512
	kernel: [75]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: 512
	kernel: [75]
	repeat: *repeat
	residual: true
	separable: *separable
	stride: [1]

	- dilation: [2]
	dropout: *dropout
	filters: 512
	kernel: [87]
	repeat: 1
	residual: false
	separable: *separable
	stride: [1]

	- dilation: [1]
	dropout: *dropout
	filters: &enc_filters 1024
	kernel: [1]
	repeat: 1
	residual: false
	stride: [1]

	decoder:
	_target_: nemo.collections.asr.modules.ConvASRDecoder
	feat_in: *enc_filters
	num_classes: 33
	vocabulary: *labels

	optim:
	name: novograd
	# _target_: nemo.core.optim.optimizers.Novograd
	lr: .05
	# optimizer arguments
	betas: [0.8, 0.5]
	weight_decay: 0.001

	# scheduler setup
	sched:
	name: CosineAnnealing

	# pytorch lightning args
	# monitor: val_loss
	# reduce_on_plateau: false

	# Scheduler params
	warmup_steps: null
	warmup_ratio: null
	min_lr: 0.0
	last_epoch: -1

	trainer:
	devices: 1 # number of gpus
	max_epochs: 5
	max_steps: -1 # computed at runtime if not set
	num_nodes: 1
	accelerator: gpu
	strategy: ddp
	accumulate_grad_batches: 1
	enable_checkpointing: False # Provided by exp_manager
	logger: False # Provided by exp_manager
	log_every_n_steps: 1 # Interval of logging.
	val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations
	benchmark: false # needs to be false for models with variable-length speech input as it slows down training

	exp_manager:
	exp_dir: null
	name: *name
	create_tensorboard_logger: True
	create_checkpoint_callback: True
	checkpoint_callback_params:
	monitor: "val_wer"
	mode: "min"
	create_wandb_logger: False
	wandb_logger_kwargs:
	name: null
	project: null