| { | |
| "_name_or_path": "zai-org/GLM-ASR-Nano-2512", | |
| "model_type": "glmasr", | |
| "architectures": [ | |
| "GlmasrModel" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "configuration_glmasr.GlmasrConfig", | |
| "AutoModelForCausalLM": "modeling_glmasr.GlmasrModel" | |
| }, | |
| "torch_dtype": "bfloat16", | |
| "attn_implementation": "flash_attention_2", | |
| "lm_config": { | |
| "architectures": [ | |
| "LlamaForCausalLM" | |
| ], | |
| "do_sample": false, | |
| "eos_token_id": [ | |
| 59246, | |
| 59253, | |
| 59255 | |
| ], | |
| "hidden_act": "silu", | |
| "hidden_size": 2048, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 6144, | |
| "length_penalty": 1.0, | |
| "max_length": 20, | |
| "max_position_embeddings": 8192, | |
| "min_length": 0, | |
| "model_type": "llama", | |
| "no_repeat_ngram_size": 0, | |
| "num_attention_heads": 16, | |
| "num_beam_groups": 1, | |
| "num_beams": 1, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 4, | |
| "num_return_sequences": 1, | |
| "pad_token_id": 59260, | |
| "return_dict": true, | |
| "rms_norm_eps": 1e-05, | |
| "rope_dim": 128, | |
| "rope_theta": 10000.0, | |
| "torch_dtype": "float16", | |
| "typical_p": 1.0, | |
| "vocab_size": 59264 | |
| }, | |
| "whisper_config": { | |
| "activation_function": "gelu", | |
| "architectures": [ | |
| "WhisperForConditionalGeneration" | |
| ], | |
| "begin_suppress_tokens": [ | |
| 220, | |
| 50257 | |
| ], | |
| "bos_token_id": 50257, | |
| "chunk_size_feed_forward": 0, | |
| "classifier_proj_size": 256, | |
| "d_model": 1280, | |
| "decoder_attention_heads": 20, | |
| "decoder_ffn_dim": 5120, | |
| "decoder_layerdrop": 0.0, | |
| "decoder_layers": 32, | |
| "decoder_start_token_id": 50258, | |
| "diversity_penalty": 0.0, | |
| "do_sample": false, | |
| "dropout": 0.0, | |
| "early_stopping": false, | |
| "encoder_attention_heads": 20, | |
| "encoder_ffn_dim": 5120, | |
| "encoder_layerdrop": 0.0, | |
| "encoder_layers": 32, | |
| "encoder_no_repeat_ngram_size": 0, | |
| "eos_token_id": 50257, | |
| "init_std": 0.02, | |
| "is_decoder": false, | |
| "is_encoder_decoder": true, | |
| "length_penalty": 1.0, | |
| "mask_feature_length": 10, | |
| "mask_feature_min_masks": 0, | |
| "mask_feature_prob": 0.0, | |
| "mask_time_length": 10, | |
| "mask_time_min_masks": 2, | |
| "mask_time_prob": 0.05, | |
| "max_length": 448, | |
| "max_source_positions": 1500, | |
| "max_target_positions": 448, | |
| "median_filter_width": 7, | |
| "min_length": 0, | |
| "model_type": "whisper", | |
| "no_repeat_ngram_size": 0, | |
| "num_beam_groups": 1, | |
| "num_beams": 1, | |
| "num_hidden_layers": 32, | |
| "num_mel_bins": 128, | |
| "num_return_sequences": 1, | |
| "output_attentions": false, | |
| "output_hidden_states": false, | |
| "output_scores": false, | |
| "pad_token_id": 50256, | |
| "remove_invalid_values": false, | |
| "repetition_penalty": 1.0, | |
| "return_dict": true, | |
| "torch_dtype": "bfloat16", | |
| "torchscript": false, | |
| "typical_p": 1.0, | |
| "use_cache": true, | |
| "use_weighted_layer_sum": false, | |
| "vocab_size": 51866 | |
| }, | |
| "adapter_type": "mlp", | |
| "merge_factor": 4, | |
| "use_rope": true, | |
| "max_whisper_length": 1500, | |
| "max_length": 65536, | |
| "mlp_adapter_act": "gelu", | |
| "transformers_version": "4.51.3" | |
| } | |