Ephraimmm commited on
Commit
60cedc0
·
verified ·
1 Parent(s): 6d4ced0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -10,15 +10,21 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
10
  # ---------------------
11
  # Model + Tokenizer
12
  # ---------------------
 
 
13
  MODEL_ID = "Ephraimmm/PIDGIN_gemma-3"
14
 
15
- bnb_config = BitsAndBytesConfig(
16
- load_in_4bit=True,
17
- bnb_4bit_quant_type="nf4",
18
- bnb_4bit_use_double_quant=True,
19
- bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
 
20
  )
21
 
 
 
 
22
  print("Loading model...")
23
  model = AutoModelForCausalLM.from_pretrained(
24
  MODEL_ID,
 
10
  # ---------------------
11
  # Model + Tokenizer
12
  # ---------------------
13
+ from transformers import AutoModelForCausalLM, AutoTokenizer
14
+
15
  MODEL_ID = "Ephraimmm/PIDGIN_gemma-3"
16
 
17
+ print("Loading quantized model...")
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ MODEL_ID,
20
+ device_map="auto", # Let HF handle GPU placement
21
+ torch_dtype="auto", # Match the quantization dtype
22
+ trust_remote_code=True # Required for Unsloth models
23
  )
24
 
25
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
26
+
27
+
28
  print("Loading model...")
29
  model = AutoModelForCausalLM.from_pretrained(
30
  MODEL_ID,