base_models_rag

Runtime error

DeepVen commited on Sep 25, 2023

Commit

fa2ae65

1 Parent(s): 93bc725

Upload 6 files

Files changed (2) hide show

main.py CHANGED Viewed

@@ -4,6 +4,7 @@ from txtai.embeddings import Embeddings
 from txtai.pipeline import Extractor
 from llama_cpp import Llama
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
@@ -16,15 +17,20 @@ app = FastAPI(docs_url="/")
 # Create extractor instance
 #extractor = Extractor(embeddings, "google/flan-t5-base")
-pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
 @app.get("/generate")
 def generate(text: str):
     """
     llama2 q4 backend
     """
-    output = pipe(text)
     return {"output": output[0]["generated_text"]}

 from txtai.pipeline import Extractor
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 # Create extractor instance
 #extractor = Extractor(embeddings, "google/flan-t5-base")
+# pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
+model_name_or_path = "TheBloke/Llama-2-7B-GGML"
+model_basename = "llama-2-7b.ggmlv3.q4_0.bin"
+model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
+llm = Llama(model_path=model_path)
 @app.get("/generate")
 def generate(text: str):
     """
     llama2 q4 backend
     """
+    output = llm(text)
     return {"output": output[0]["generated_text"]}

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ fastapi==0.74.*
 requests==2.27.*
 uvicorn[standard]==0.17.*
 sentencepiece==0.1.*
-txtai==6.0.*

 requests==2.27.*
 uvicorn[standard]==0.17.*
 sentencepiece==0.1.*
+txtai==6.0.*
+llama-cpp-python