Spaces:
Paused
Paused
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -95,6 +95,34 @@ def load_tokenizer_and_model(base_model,load_8bit=False):
|
|
| 95 |
model.eval()
|
| 96 |
return tokenizer,model,device
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
# Greedy Search
|
| 99 |
def greedy_search(input_ids: torch.Tensor,
|
| 100 |
model: torch.nn.Module,
|
|
|
|
| 95 |
model.eval()
|
| 96 |
return tokenizer,model,device
|
| 97 |
|
| 98 |
+
|
| 99 |
+
def load_tokenizer_and_model_gpt2(base_model,load_8bit=False):
|
| 100 |
+
if torch.cuda.is_available():
|
| 101 |
+
device = "cuda"
|
| 102 |
+
else:
|
| 103 |
+
device = "cpu"
|
| 104 |
+
|
| 105 |
+
tokenizer = GPT2Tokenizer.from_pretrained(base_model, use_fast = True)
|
| 106 |
+
if device == "cuda":
|
| 107 |
+
model = GPT2LMHeadModel.from_pretrained(
|
| 108 |
+
base_model,
|
| 109 |
+
load_in_8bit=load_8bit,
|
| 110 |
+
torch_dtype=torch.float16,
|
| 111 |
+
device_map="auto",
|
| 112 |
+
)
|
| 113 |
+
else:
|
| 114 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 115 |
+
base_model, device_map={"": device}, low_cpu_mem_usage=True
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
#if not load_8bit:
|
| 119 |
+
#model.half() # seems to fix bugs for some users.
|
| 120 |
+
|
| 121 |
+
model.eval()
|
| 122 |
+
return tokenizer,model,device
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
|
| 126 |
# Greedy Search
|
| 127 |
def greedy_search(input_ids: torch.Tensor,
|
| 128 |
model: torch.nn.Module,
|