subhankarg's picture
Upload folder using huggingface_hub
0558aa4 verified
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is here to import it once, which improves the speed of launch when in debug-mode
from nemo.utils.import_utils import safe_import
safe_import("transformer_engine")
from nemo.collections.llm import peft
from nemo.collections.llm.bert.data import BERTMockDataModule, BERTPreTrainingDataModule, SpecterDataModule
from nemo.collections.llm.bert.model import (
BertConfig,
BertEmbeddingLargeConfig,
BertEmbeddingMiniConfig,
BertEmbeddingModel,
BertModel,
HuggingFaceBertBaseConfig,
HuggingFaceBertConfig,
HuggingFaceBertLargeConfig,
HuggingFaceBertModel,
MegatronBertBaseConfig,
MegatronBertConfig,
MegatronBertLargeConfig,
)
from nemo.collections.llm.gpt.data import ( # noqa: F401
AlpacaDataModule,
ChatDataModule,
CustomReRankerDataModule,
CustomRetrievalDataModule,
DollyDataModule,
FineTuningDataModule,
HFDatasetDataModule,
HFDatasetDataModulePacked,
HFMockDataModule,
MockDataModule,
PreTrainingDataModule,
SpecterReRankerDataModule,
SquadDataModule,
)
from nemo.collections.llm.gpt.data.api import dolly, hf_dataset, mock, squad
from nemo.collections.llm.gpt.model import ( # noqa: F401
Baichuan2Config,
Baichuan2Config7B,
Baichuan2Model,
BaseMambaConfig1_3B,
BaseMambaConfig2_7B,
BaseMambaConfig130M,
BaseMambaConfig370M,
BaseMambaConfig780M,
ChatGLM2Config6B,
ChatGLM3Config6B,
ChatGLMConfig,
ChatGLMModel,
CodeGemmaConfig2B,
CodeGemmaConfig7B,
CodeLlamaConfig7B,
CodeLlamaConfig13B,
CodeLlamaConfig34B,
CodeLlamaConfig70B,
DeepSeekModel,
DeepSeekV2Config,
DeepSeekV2LiteConfig,
DeepSeekV3Config,
Gemma2Config,
Gemma2Config2B,
Gemma2Config9B,
Gemma2Config27B,
Gemma2Model,
Gemma3Config1B,
Gemma3Config4B,
Gemma3Config12B,
Gemma3Config27B,
Gemma3Model,
GemmaConfig,
GemmaConfig2B,
GemmaConfig7B,
GemmaModel,
GPTConfig,
GPTConfig5B,
GPTConfig7B,
GPTConfig20B,
GPTConfig40B,
GPTConfig126M,
GPTConfig175B,
GPTModel,
GPTOSSConfig,
GPTOSSConfig20B,
GPTOSSConfig120B,
GPTOSSModel,
Hyena1bConfig,
Hyena7bARCLongContextConfig,
Hyena7bConfig,
Hyena40bARCLongContextConfig,
Hyena40bConfig,
HyenaConfig,
HyenaModel,
HyenaNV1bConfig,
HyenaNV7bConfig,
HyenaNV40bConfig,
HyenaNVTestConfig,
HyenaTestConfig,
Llama2Config7B,
Llama2Config13B,
Llama2Config70B,
Llama3Config8B,
Llama3Config70B,
Llama4Config,
Llama4Experts16Config,
Llama4Experts128Config,
Llama31Config8B,
Llama31Config70B,
Llama31Config405B,
Llama31Nemotron70BConfig,
Llama31NemotronNano8BConfig,
Llama31NemotronUltra253BConfig,
Llama32Config1B,
Llama32Config3B,
Llama32EmbeddingConfig1B,
Llama32EmbeddingConfig3B,
Llama32Reranker1BConfig,
Llama32Reranker500MConfig,
Llama33NemotronSuper49BConfig,
LlamaConfig,
LlamaEmbeddingModel,
LlamaModel,
LlamaNemotronModel,
MambaModel,
MaskedTokenLossReduction,
MistralConfig7B,
MistralModel,
MistralNeMoConfig12B,
MistralSmall3Config24B,
MixtralConfig,
MixtralConfig8x3B,
MixtralConfig8x7B,
MixtralConfig8x22B,
MixtralModel,
Nemotron3Config4B,
Nemotron3Config8B,
Nemotron3Config22B,
Nemotron4Config15B,
Nemotron4Config340B,
NemotronConfig,
NemotronHConfig4B,
NemotronHConfig8B,
NemotronHConfig47B,
NemotronHConfig56B,
NemotronModel,
NemotronNano9Bv2,
NemotronNano12Bv2,
NVIDIAMambaConfig8B,
NVIDIAMambaHybridConfig8B,
Phi3Config,
Phi3ConfigMini,
Phi3Model,
Qwen2Config,
Qwen2Config1P5B,
Qwen2Config7B,
Qwen2Config72B,
Qwen2Config500M,
Qwen2Model,
Qwen3Config,
Qwen3Config1P7B,
Qwen3Config4B,
Qwen3Config8B,
Qwen3Config14B,
Qwen3Config30B_A3B,
Qwen3Config32B,
Qwen3Config235B_A22B,
Qwen3Config600M,
Qwen3Model,
Qwen25Config1P5B,
Qwen25Config3B,
Qwen25Config7B,
Qwen25Config14B,
Qwen25Config32B,
Qwen25Config72B,
Qwen25Config500M,
ReRankerModel,
SSMConfig,
Starcoder2Config,
Starcoder2Config3B,
Starcoder2Config7B,
Starcoder2Config15B,
Starcoder2Model,
StarcoderConfig,
StarcoderConfig15B,
StarcoderModel,
gpt_data_step,
gpt_forward_step,
)
from nemo.collections.llm.t5.data import FineTuningDataModule as T5FineTuningDataModule
from nemo.collections.llm.t5.data import MockDataModule as T5MockDataModule
from nemo.collections.llm.t5.data import PreTrainingDataModule as T5PreTrainingDataModule
from nemo.collections.llm.t5.data import SquadDataModule as T5SquadDataModule
from nemo.collections.llm.t5.model import (
T5Config,
T5Config3B,
T5Config11B,
T5Config220M,
T5Model,
t5_data_step,
t5_forward_step,
)
__all__ = [
"MockDataModule",
"T5MockDataModule",
"CustomRetrievalDataModule",
"CustomReRankerDataModule",
"SpecterReRankerDataModule",
"GPTModel",
"GPTConfig",
"HyenaTestConfig",
"Hyena7bConfig",
"Hyena40bConfig",
"Hyena7bARCLongContextConfig",
"Hyena40bARCLongContextConfig",
"HyenaNVTestConfig",
"HyenaNV40bConfig",
"HyenaNV7bConfig",
"HyenaConfig",
"HyenaModel",
"Hyena1bConfig",
"HyenaNV1bConfig",
"gpt_data_step",
"gpt_forward_step",
"T5Model",
"T5Config",
"T5Config220M",
"T5Config3B",
"T5Config11B",
"BertConfig",
"BertEmbeddingModel",
"BertModel",
"BertEmbeddingLargeConfig",
"BertEmbeddingMiniConfig",
"t5_data_step",
"t5_forward_step",
"MaskedTokenLossReduction",
"MistralConfig7B",
"MistralNeMoConfig12B",
"MistralSmall3Config24B",
"MistralModel",
"MixtralConfig",
"MixtralConfig8x3B",
"MixtralConfig8x7B",
"MixtralConfig8x22B",
"MixtralModel",
"Starcoder2Config15B",
"Starcoder2Config",
"Starcoder2Model",
"NemotronModel",
"Nemotron3Config4B",
"Nemotron3Config8B",
"Nemotron3Config22B",
"Nemotron4Config15B",
"Nemotron4Config340B",
"NemotronConfig",
"LlamaEmbeddingModel",
"Llama32EmbeddingConfig1B",
"Llama32EmbeddingConfig3B",
"Phi3Config",
"Phi3ConfigMini",
"Phi3Model",
"SSMConfig",
"BaseMambaConfig130M",
"BaseMambaConfig370M",
"BaseMambaConfig780M",
"BaseMambaConfig1_3B",
"BaseMambaConfig2_7B",
"NVIDIAMambaConfig8B",
"NVIDIAMambaHybridConfig8B",
"NemotronHConfig4B",
"NemotronHConfig8B",
"NemotronHConfig47B",
"NemotronHConfig56B",
"NemotronNano9Bv2",
"NemotronNano12Bv2",
"MambaModel",
"LlamaConfig",
"Llama2Config7B",
"Llama2Config13B",
"Llama2Config70B",
"Llama3Config8B",
"Llama3Config70B",
"Llama31Config8B",
"Llama31Config70B",
"Llama31Config405B",
"Llama32Config1B",
"Llama32Config3B",
"Llama4Experts16Config",
"Llama4Experts128Config",
"Llama4Config",
"Llama31NemotronNano8BConfig",
"Llama31Nemotron70BConfig",
"Llama33NemotronSuper49BConfig",
"Llama31NemotronUltra253BConfig",
"Llama32Reranker500MConfig",
"Llama32Reranker1BConfig",
"CodeLlamaConfig7B",
"CodeLlamaConfig13B",
"CodeLlamaConfig34B",
"CodeLlamaConfig70B",
"LlamaModel",
"LlamaNemotronModel",
"GPTOSSConfig",
"GPTOSSConfig120B",
"GPTOSSConfig20B",
"GPTOSSModel",
"GemmaConfig",
"GemmaConfig2B",
"GemmaConfig7B",
"CodeGemmaConfig2B",
"CodeGemmaConfig7B",
"GemmaModel",
"Gemma2Model",
"Gemma2Config9B",
"Gemma2Config",
"Gemma2Config27B",
"Gemma2Config2B",
"Gemma3Model",
"Gemma3Config1B",
"Gemma3Config4B",
"Gemma3Config12B",
"Gemma3Config27B",
"Baichuan2Config",
"Baichuan2Config7B",
"Baichuan2Model",
"ChatGLMConfig",
"ChatGLM2Config6B",
"ChatGLM3Config6B",
"ChatGLMModel",
"Qwen2Model",
"Qwen2Config7B",
"Qwen2Config",
"Qwen2Config500M",
"Qwen2Config1P5B",
"Qwen25Config3B",
"Qwen2Config72B",
"Qwen25Config500M",
"Qwen25Config1P5B",
"Qwen25Config7B",
"Qwen25Config14B",
"Qwen25Config32B",
"Qwen25Config72B",
"Qwen3Config",
"Qwen3Config600M",
"Qwen3Config1P7B",
"Qwen3Config4B",
"Qwen3Config8B",
"Qwen3Config14B",
"Qwen3Config32B",
"Qwen3Config30B_A3B",
"Qwen3Config235B_A22B",
"Qwen3Model",
"PreTrainingDataModule",
"FineTuningDataModule",
"ChatDataModule",
"SquadDataModule",
"T5PreTrainingDataModule",
"T5FineTuningDataModule",
"T5SquadDataModule",
"T5MockDataModule",
"DeepSeekModel",
"DeepSeekV2Config",
"DeepSeekV2LiteConfig",
"DeepSeekV3Config",
"HuggingFaceBertBaseConfig",
"HuggingFaceBertConfig",
"HuggingFaceBertLargeConfig",
"HuggingFaceBertModel",
"MegatronBertBaseConfig",
"MegatronBertConfig",
"MegatronBertLargeConfig",
"BERTMockDataModule",
"BERTPreTrainingDataModule",
"SpecterDataModule",
"DollyDataModule",
"tokenizer",
"mock",
"squad",
"dolly",
"peft",
"hf_dataset",
"HFMockDataModule",
]
from nemo.utils import logging
try:
import nemo_run as run # noqa: F401
from nemo.collections.llm.api import ( # noqa: F401
distill,
export_ckpt,
finetune,
generate,
import_ckpt,
pretrain,
prune,
ptq,
train,
validate,
)
from nemo.collections.llm.recipes import * # noqa
__all__.extend(
[
"train",
"import_ckpt",
"export_ckpt",
"pretrain",
"validate",
"finetune",
"generate",
"prune",
"ptq",
"distill",
]
)
except ImportError as error:
logging.warning(f"Failed to import nemo.collections.llm.[api,recipes]: {error}")
try:
from nemo.collections.llm.api import deploy # noqa: F401
__all__.append("deploy")
except ImportError as error:
logging.warning(f"The deploy module could not be imported: {error}")
try:
from nemo.collections.llm.api import evaluate # noqa: F401
__all__.append("evaluate")
except ImportError as error:
logging.warning(f"The evaluate module could not be imported: {error}")
import warnings
warnings.warn(
"nemo.collections.llm is deprecated and will be removed in a future major NeMo FW container release. "
"Please refer to the new Megatron-Bridge repository: https://github.com/NVIDIA-NeMo/Megatron-Bridge",
DeprecationWarning,
stacklevel=2,
)