Spaces:
Runtime error
Runtime error
File size: 2,194 Bytes
0558aa4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Implemented following the guide at https://www.promptingguide.ai/models/gemma#gemma-7b-prompt-format
"""
from lhotse.cut import Cut, MixedCut
from nemo.collections.common.data.prompt_fn import registered_prompt_format_fn
from nemo.collections.common.prompts.formatter import Modality, PromptFormatter
GEMMA_BOS = "<start_of_turn>"
GEMMA_END_OF_TURN = "<end_of_turn>"
GEMMA_NL = "\n\n"
class GemmaPromptFormatter(PromptFormatter):
NAME = "gemma"
OUTPUT_ROLE = "assistant"
INSERT_BOS = True
INSERT_EOS = True
TEMPLATE = {
"user": {
"template": f"{GEMMA_BOS}user\n|message|{GEMMA_END_OF_TURN}\n{GEMMA_BOS}model\n",
"slots": {
"message": Modality.Text,
},
},
OUTPUT_ROLE: {
# Note: that trailing NL is bothering me.
"template": f"|message|{GEMMA_END_OF_TURN}\n",
"slots": {
"message": Modality.Text,
},
},
}
@registered_prompt_format_fn(Cut, GemmaPromptFormatter)
def gemma1(cut: Cut, prompt: GemmaPromptFormatter):
if isinstance(cut, MixedCut):
cut = cut.first_non_padding_cut
if cut.has_custom("context"):
context = cut.context
elif cut.has_custom("question"):
context = cut.question
else:
context = cut.default_context
turns = [{"role": "user", "slots": {"message": context}}]
if (answer := cut.supervisions[0].text) is not None:
turns.append({"role": "assistant", "slots": {"message": answer}})
return prompt.encode_dialog(turns)
|