Spaces:
Runtime error
Runtime error
| # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # This is here to import it once, which improves the speed of launch when in debug-mode | |
| from nemo.utils.import_utils import safe_import | |
| safe_import("transformer_engine") | |
| from nemo.collections.llm import peft | |
| from nemo.collections.llm.bert.data import BERTMockDataModule, BERTPreTrainingDataModule, SpecterDataModule | |
| from nemo.collections.llm.bert.model import ( | |
| BertConfig, | |
| BertEmbeddingLargeConfig, | |
| BertEmbeddingMiniConfig, | |
| BertEmbeddingModel, | |
| BertModel, | |
| HuggingFaceBertBaseConfig, | |
| HuggingFaceBertConfig, | |
| HuggingFaceBertLargeConfig, | |
| HuggingFaceBertModel, | |
| MegatronBertBaseConfig, | |
| MegatronBertConfig, | |
| MegatronBertLargeConfig, | |
| ) | |
| from nemo.collections.llm.gpt.data import ( # noqa: F401 | |
| AlpacaDataModule, | |
| ChatDataModule, | |
| CustomReRankerDataModule, | |
| CustomRetrievalDataModule, | |
| DollyDataModule, | |
| FineTuningDataModule, | |
| HFDatasetDataModule, | |
| HFDatasetDataModulePacked, | |
| HFMockDataModule, | |
| MockDataModule, | |
| PreTrainingDataModule, | |
| SpecterReRankerDataModule, | |
| SquadDataModule, | |
| ) | |
| from nemo.collections.llm.gpt.data.api import dolly, hf_dataset, mock, squad | |
| from nemo.collections.llm.gpt.model import ( # noqa: F401 | |
| Baichuan2Config, | |
| Baichuan2Config7B, | |
| Baichuan2Model, | |
| BaseMambaConfig1_3B, | |
| BaseMambaConfig2_7B, | |
| BaseMambaConfig130M, | |
| BaseMambaConfig370M, | |
| BaseMambaConfig780M, | |
| ChatGLM2Config6B, | |
| ChatGLM3Config6B, | |
| ChatGLMConfig, | |
| ChatGLMModel, | |
| CodeGemmaConfig2B, | |
| CodeGemmaConfig7B, | |
| CodeLlamaConfig7B, | |
| CodeLlamaConfig13B, | |
| CodeLlamaConfig34B, | |
| CodeLlamaConfig70B, | |
| DeepSeekModel, | |
| DeepSeekV2Config, | |
| DeepSeekV2LiteConfig, | |
| DeepSeekV3Config, | |
| Gemma2Config, | |
| Gemma2Config2B, | |
| Gemma2Config9B, | |
| Gemma2Config27B, | |
| Gemma2Model, | |
| Gemma3Config1B, | |
| Gemma3Config4B, | |
| Gemma3Config12B, | |
| Gemma3Config27B, | |
| Gemma3Model, | |
| GemmaConfig, | |
| GemmaConfig2B, | |
| GemmaConfig7B, | |
| GemmaModel, | |
| GPTConfig, | |
| GPTConfig5B, | |
| GPTConfig7B, | |
| GPTConfig20B, | |
| GPTConfig40B, | |
| GPTConfig126M, | |
| GPTConfig175B, | |
| GPTModel, | |
| GPTOSSConfig, | |
| GPTOSSConfig20B, | |
| GPTOSSConfig120B, | |
| GPTOSSModel, | |
| Hyena1bConfig, | |
| Hyena7bARCLongContextConfig, | |
| Hyena7bConfig, | |
| Hyena40bARCLongContextConfig, | |
| Hyena40bConfig, | |
| HyenaConfig, | |
| HyenaModel, | |
| HyenaNV1bConfig, | |
| HyenaNV7bConfig, | |
| HyenaNV40bConfig, | |
| HyenaNVTestConfig, | |
| HyenaTestConfig, | |
| Llama2Config7B, | |
| Llama2Config13B, | |
| Llama2Config70B, | |
| Llama3Config8B, | |
| Llama3Config70B, | |
| Llama4Config, | |
| Llama4Experts16Config, | |
| Llama4Experts128Config, | |
| Llama31Config8B, | |
| Llama31Config70B, | |
| Llama31Config405B, | |
| Llama31Nemotron70BConfig, | |
| Llama31NemotronNano8BConfig, | |
| Llama31NemotronUltra253BConfig, | |
| Llama32Config1B, | |
| Llama32Config3B, | |
| Llama32EmbeddingConfig1B, | |
| Llama32EmbeddingConfig3B, | |
| Llama32Reranker1BConfig, | |
| Llama32Reranker500MConfig, | |
| Llama33NemotronSuper49BConfig, | |
| LlamaConfig, | |
| LlamaEmbeddingModel, | |
| LlamaModel, | |
| LlamaNemotronModel, | |
| MambaModel, | |
| MaskedTokenLossReduction, | |
| MistralConfig7B, | |
| MistralModel, | |
| MistralNeMoConfig12B, | |
| MistralSmall3Config24B, | |
| MixtralConfig, | |
| MixtralConfig8x3B, | |
| MixtralConfig8x7B, | |
| MixtralConfig8x22B, | |
| MixtralModel, | |
| Nemotron3Config4B, | |
| Nemotron3Config8B, | |
| Nemotron3Config22B, | |
| Nemotron4Config15B, | |
| Nemotron4Config340B, | |
| NemotronConfig, | |
| NemotronHConfig4B, | |
| NemotronHConfig8B, | |
| NemotronHConfig47B, | |
| NemotronHConfig56B, | |
| NemotronModel, | |
| NemotronNano9Bv2, | |
| NemotronNano12Bv2, | |
| NVIDIAMambaConfig8B, | |
| NVIDIAMambaHybridConfig8B, | |
| Phi3Config, | |
| Phi3ConfigMini, | |
| Phi3Model, | |
| Qwen2Config, | |
| Qwen2Config1P5B, | |
| Qwen2Config7B, | |
| Qwen2Config72B, | |
| Qwen2Config500M, | |
| Qwen2Model, | |
| Qwen3Config, | |
| Qwen3Config1P7B, | |
| Qwen3Config4B, | |
| Qwen3Config8B, | |
| Qwen3Config14B, | |
| Qwen3Config30B_A3B, | |
| Qwen3Config32B, | |
| Qwen3Config235B_A22B, | |
| Qwen3Config600M, | |
| Qwen3Model, | |
| Qwen25Config1P5B, | |
| Qwen25Config3B, | |
| Qwen25Config7B, | |
| Qwen25Config14B, | |
| Qwen25Config32B, | |
| Qwen25Config72B, | |
| Qwen25Config500M, | |
| ReRankerModel, | |
| SSMConfig, | |
| Starcoder2Config, | |
| Starcoder2Config3B, | |
| Starcoder2Config7B, | |
| Starcoder2Config15B, | |
| Starcoder2Model, | |
| StarcoderConfig, | |
| StarcoderConfig15B, | |
| StarcoderModel, | |
| gpt_data_step, | |
| gpt_forward_step, | |
| ) | |
| from nemo.collections.llm.t5.data import FineTuningDataModule as T5FineTuningDataModule | |
| from nemo.collections.llm.t5.data import MockDataModule as T5MockDataModule | |
| from nemo.collections.llm.t5.data import PreTrainingDataModule as T5PreTrainingDataModule | |
| from nemo.collections.llm.t5.data import SquadDataModule as T5SquadDataModule | |
| from nemo.collections.llm.t5.model import ( | |
| T5Config, | |
| T5Config3B, | |
| T5Config11B, | |
| T5Config220M, | |
| T5Model, | |
| t5_data_step, | |
| t5_forward_step, | |
| ) | |
| __all__ = [ | |
| "MockDataModule", | |
| "T5MockDataModule", | |
| "CustomRetrievalDataModule", | |
| "CustomReRankerDataModule", | |
| "SpecterReRankerDataModule", | |
| "GPTModel", | |
| "GPTConfig", | |
| "HyenaTestConfig", | |
| "Hyena7bConfig", | |
| "Hyena40bConfig", | |
| "Hyena7bARCLongContextConfig", | |
| "Hyena40bARCLongContextConfig", | |
| "HyenaNVTestConfig", | |
| "HyenaNV40bConfig", | |
| "HyenaNV7bConfig", | |
| "HyenaConfig", | |
| "HyenaModel", | |
| "Hyena1bConfig", | |
| "HyenaNV1bConfig", | |
| "gpt_data_step", | |
| "gpt_forward_step", | |
| "T5Model", | |
| "T5Config", | |
| "T5Config220M", | |
| "T5Config3B", | |
| "T5Config11B", | |
| "BertConfig", | |
| "BertEmbeddingModel", | |
| "BertModel", | |
| "BertEmbeddingLargeConfig", | |
| "BertEmbeddingMiniConfig", | |
| "t5_data_step", | |
| "t5_forward_step", | |
| "MaskedTokenLossReduction", | |
| "MistralConfig7B", | |
| "MistralNeMoConfig12B", | |
| "MistralSmall3Config24B", | |
| "MistralModel", | |
| "MixtralConfig", | |
| "MixtralConfig8x3B", | |
| "MixtralConfig8x7B", | |
| "MixtralConfig8x22B", | |
| "MixtralModel", | |
| "Starcoder2Config15B", | |
| "Starcoder2Config", | |
| "Starcoder2Model", | |
| "NemotronModel", | |
| "Nemotron3Config4B", | |
| "Nemotron3Config8B", | |
| "Nemotron3Config22B", | |
| "Nemotron4Config15B", | |
| "Nemotron4Config340B", | |
| "NemotronConfig", | |
| "LlamaEmbeddingModel", | |
| "Llama32EmbeddingConfig1B", | |
| "Llama32EmbeddingConfig3B", | |
| "Phi3Config", | |
| "Phi3ConfigMini", | |
| "Phi3Model", | |
| "SSMConfig", | |
| "BaseMambaConfig130M", | |
| "BaseMambaConfig370M", | |
| "BaseMambaConfig780M", | |
| "BaseMambaConfig1_3B", | |
| "BaseMambaConfig2_7B", | |
| "NVIDIAMambaConfig8B", | |
| "NVIDIAMambaHybridConfig8B", | |
| "NemotronHConfig4B", | |
| "NemotronHConfig8B", | |
| "NemotronHConfig47B", | |
| "NemotronHConfig56B", | |
| "NemotronNano9Bv2", | |
| "NemotronNano12Bv2", | |
| "MambaModel", | |
| "LlamaConfig", | |
| "Llama2Config7B", | |
| "Llama2Config13B", | |
| "Llama2Config70B", | |
| "Llama3Config8B", | |
| "Llama3Config70B", | |
| "Llama31Config8B", | |
| "Llama31Config70B", | |
| "Llama31Config405B", | |
| "Llama32Config1B", | |
| "Llama32Config3B", | |
| "Llama4Experts16Config", | |
| "Llama4Experts128Config", | |
| "Llama4Config", | |
| "Llama31NemotronNano8BConfig", | |
| "Llama31Nemotron70BConfig", | |
| "Llama33NemotronSuper49BConfig", | |
| "Llama31NemotronUltra253BConfig", | |
| "Llama32Reranker500MConfig", | |
| "Llama32Reranker1BConfig", | |
| "CodeLlamaConfig7B", | |
| "CodeLlamaConfig13B", | |
| "CodeLlamaConfig34B", | |
| "CodeLlamaConfig70B", | |
| "LlamaModel", | |
| "LlamaNemotronModel", | |
| "GPTOSSConfig", | |
| "GPTOSSConfig120B", | |
| "GPTOSSConfig20B", | |
| "GPTOSSModel", | |
| "GemmaConfig", | |
| "GemmaConfig2B", | |
| "GemmaConfig7B", | |
| "CodeGemmaConfig2B", | |
| "CodeGemmaConfig7B", | |
| "GemmaModel", | |
| "Gemma2Model", | |
| "Gemma2Config9B", | |
| "Gemma2Config", | |
| "Gemma2Config27B", | |
| "Gemma2Config2B", | |
| "Gemma3Model", | |
| "Gemma3Config1B", | |
| "Gemma3Config4B", | |
| "Gemma3Config12B", | |
| "Gemma3Config27B", | |
| "Baichuan2Config", | |
| "Baichuan2Config7B", | |
| "Baichuan2Model", | |
| "ChatGLMConfig", | |
| "ChatGLM2Config6B", | |
| "ChatGLM3Config6B", | |
| "ChatGLMModel", | |
| "Qwen2Model", | |
| "Qwen2Config7B", | |
| "Qwen2Config", | |
| "Qwen2Config500M", | |
| "Qwen2Config1P5B", | |
| "Qwen25Config3B", | |
| "Qwen2Config72B", | |
| "Qwen25Config500M", | |
| "Qwen25Config1P5B", | |
| "Qwen25Config7B", | |
| "Qwen25Config14B", | |
| "Qwen25Config32B", | |
| "Qwen25Config72B", | |
| "Qwen3Config", | |
| "Qwen3Config600M", | |
| "Qwen3Config1P7B", | |
| "Qwen3Config4B", | |
| "Qwen3Config8B", | |
| "Qwen3Config14B", | |
| "Qwen3Config32B", | |
| "Qwen3Config30B_A3B", | |
| "Qwen3Config235B_A22B", | |
| "Qwen3Model", | |
| "PreTrainingDataModule", | |
| "FineTuningDataModule", | |
| "ChatDataModule", | |
| "SquadDataModule", | |
| "T5PreTrainingDataModule", | |
| "T5FineTuningDataModule", | |
| "T5SquadDataModule", | |
| "T5MockDataModule", | |
| "DeepSeekModel", | |
| "DeepSeekV2Config", | |
| "DeepSeekV2LiteConfig", | |
| "DeepSeekV3Config", | |
| "HuggingFaceBertBaseConfig", | |
| "HuggingFaceBertConfig", | |
| "HuggingFaceBertLargeConfig", | |
| "HuggingFaceBertModel", | |
| "MegatronBertBaseConfig", | |
| "MegatronBertConfig", | |
| "MegatronBertLargeConfig", | |
| "BERTMockDataModule", | |
| "BERTPreTrainingDataModule", | |
| "SpecterDataModule", | |
| "DollyDataModule", | |
| "tokenizer", | |
| "mock", | |
| "squad", | |
| "dolly", | |
| "peft", | |
| "hf_dataset", | |
| "HFMockDataModule", | |
| ] | |
| from nemo.utils import logging | |
| try: | |
| import nemo_run as run # noqa: F401 | |
| from nemo.collections.llm.api import ( # noqa: F401 | |
| distill, | |
| export_ckpt, | |
| finetune, | |
| generate, | |
| import_ckpt, | |
| pretrain, | |
| prune, | |
| ptq, | |
| train, | |
| validate, | |
| ) | |
| from nemo.collections.llm.recipes import * # noqa | |
| __all__.extend( | |
| [ | |
| "train", | |
| "import_ckpt", | |
| "export_ckpt", | |
| "pretrain", | |
| "validate", | |
| "finetune", | |
| "generate", | |
| "prune", | |
| "ptq", | |
| "distill", | |
| ] | |
| ) | |
| except ImportError as error: | |
| logging.warning(f"Failed to import nemo.collections.llm.[api,recipes]: {error}") | |
| try: | |
| from nemo.collections.llm.api import deploy # noqa: F401 | |
| __all__.append("deploy") | |
| except ImportError as error: | |
| logging.warning(f"The deploy module could not be imported: {error}") | |
| try: | |
| from nemo.collections.llm.api import evaluate # noqa: F401 | |
| __all__.append("evaluate") | |
| except ImportError as error: | |
| logging.warning(f"The evaluate module could not be imported: {error}") | |
| import warnings | |
| warnings.warn( | |
| "nemo.collections.llm is deprecated and will be removed in a future major NeMo FW container release. " | |
| "Please refer to the new Megatron-Bridge repository: https://github.com/NVIDIA-NeMo/Megatron-Bridge", | |
| DeprecationWarning, | |
| stacklevel=2, | |
| ) | |