Spaces:
Runtime error
Runtime error
Upload 6 files
Browse files- main.py +8 -2
- requirements.txt +2 -1
main.py
CHANGED
|
@@ -4,6 +4,7 @@ from txtai.embeddings import Embeddings
|
|
| 4 |
from txtai.pipeline import Extractor
|
| 5 |
from llama_cpp import Llama
|
| 6 |
|
|
|
|
| 7 |
|
| 8 |
# NOTE - we configure docs_url to serve the interactive Docs at the root path
|
| 9 |
# of the app. This way, we can use the docs as a landing page for the app on Spaces.
|
|
@@ -16,15 +17,20 @@ app = FastAPI(docs_url="/")
|
|
| 16 |
# Create extractor instance
|
| 17 |
#extractor = Extractor(embeddings, "google/flan-t5-base")
|
| 18 |
|
| 19 |
-
pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
@app.get("/generate")
|
| 23 |
def generate(text: str):
|
| 24 |
"""
|
| 25 |
llama2 q4 backend
|
| 26 |
"""
|
| 27 |
-
output =
|
| 28 |
return {"output": output[0]["generated_text"]}
|
| 29 |
|
| 30 |
|
|
|
|
| 4 |
from txtai.pipeline import Extractor
|
| 5 |
from llama_cpp import Llama
|
| 6 |
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
|
| 9 |
# NOTE - we configure docs_url to serve the interactive Docs at the root path
|
| 10 |
# of the app. This way, we can use the docs as a landing page for the app on Spaces.
|
|
|
|
| 17 |
# Create extractor instance
|
| 18 |
#extractor = Extractor(embeddings, "google/flan-t5-base")
|
| 19 |
|
| 20 |
+
# pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
|
| 21 |
|
| 22 |
+
model_name_or_path = "TheBloke/Llama-2-7B-GGML"
|
| 23 |
+
model_basename = "llama-2-7b.ggmlv3.q4_0.bin"
|
| 24 |
+
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
|
| 25 |
+
|
| 26 |
+
llm = Llama(model_path=model_path)
|
| 27 |
|
| 28 |
@app.get("/generate")
|
| 29 |
def generate(text: str):
|
| 30 |
"""
|
| 31 |
llama2 q4 backend
|
| 32 |
"""
|
| 33 |
+
output = llm(text)
|
| 34 |
return {"output": output[0]["generated_text"]}
|
| 35 |
|
| 36 |
|
requirements.txt
CHANGED
|
@@ -2,4 +2,5 @@ fastapi==0.74.*
|
|
| 2 |
requests==2.27.*
|
| 3 |
uvicorn[standard]==0.17.*
|
| 4 |
sentencepiece==0.1.*
|
| 5 |
-
txtai==6.0.*
|
|
|
|
|
|
| 2 |
requests==2.27.*
|
| 3 |
uvicorn[standard]==0.17.*
|
| 4 |
sentencepiece==0.1.*
|
| 5 |
+
txtai==6.0.*
|
| 6 |
+
llama-cpp-python
|