Spaces:
Sleeping
Sleeping
ffreemt
commited on
Commit
·
1849912
1
Parent(s):
96b6f2f
update RedirectResponse
Browse files- m3_server.py +6 -2
m3_server.py
CHANGED
|
@@ -11,6 +11,7 @@ from fastapi import FastAPI, HTTPException, Request
|
|
| 11 |
from fastapi.responses import JSONResponse
|
| 12 |
from FlagEmbedding import BGEM3FlagModel
|
| 13 |
from pydantic import BaseModel
|
|
|
|
| 14 |
from starlette.status import HTTP_504_GATEWAY_TIMEOUT
|
| 15 |
|
| 16 |
_ = """
|
|
@@ -27,7 +28,9 @@ max_q_length = 256 # max context lenght for questions in re-ranker
|
|
| 27 |
request_flush_timeout = 0.1 # flush time out for future improvements on api calls / gpu batches (for now is pretty basic)
|
| 28 |
rerank_weights = [0.4, 0.2, 0.4] # re-rank score weights
|
| 29 |
request_time_out = 30 # Timeout threshold
|
|
|
|
| 30 |
gpu_time_out = 5 # gpu processing timeout threshold
|
|
|
|
| 31 |
port = 3000
|
| 32 |
port = 7860
|
| 33 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -174,7 +177,7 @@ class RequestProcessor:
|
|
| 174 |
await self.queue.put((request_data, request_type, request_id))
|
| 175 |
return await self.response_futures[request_id]
|
| 176 |
except Exception as e:
|
| 177 |
-
raise HTTPException(status_code=500, detail=f"Internal Server Error {e}")
|
| 178 |
|
| 179 |
|
| 180 |
description = dedent(
|
|
@@ -232,7 +235,8 @@ async def timeout_middleware(request: Request, call_next):
|
|
| 232 |
@app.get("/")
|
| 233 |
async def landing():
|
| 234 |
"""Define landing page."""
|
| 235 |
-
return "Swagger UI at https://mikeee-baai-m3.hf.space/docs"
|
|
|
|
| 236 |
|
| 237 |
|
| 238 |
@app.post("/embed/", response_model=EmbedResponse)
|
|
|
|
| 11 |
from fastapi.responses import JSONResponse
|
| 12 |
from FlagEmbedding import BGEM3FlagModel
|
| 13 |
from pydantic import BaseModel
|
| 14 |
+
from starlette.responses import RedirectResponse
|
| 15 |
from starlette.status import HTTP_504_GATEWAY_TIMEOUT
|
| 16 |
|
| 17 |
_ = """
|
|
|
|
| 28 |
request_flush_timeout = 0.1 # flush time out for future improvements on api calls / gpu batches (for now is pretty basic)
|
| 29 |
rerank_weights = [0.4, 0.2, 0.4] # re-rank score weights
|
| 30 |
request_time_out = 30 # Timeout threshold
|
| 31 |
+
request_time_out = 1200 # Timeout threshold
|
| 32 |
gpu_time_out = 5 # gpu processing timeout threshold
|
| 33 |
+
gpu_time_out = 600 # gpu processing timeout threshold
|
| 34 |
port = 3000
|
| 35 |
port = 7860
|
| 36 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 177 |
await self.queue.put((request_data, request_type, request_id))
|
| 178 |
return await self.response_futures[request_id]
|
| 179 |
except Exception as e:
|
| 180 |
+
raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}")
|
| 181 |
|
| 182 |
|
| 183 |
description = dedent(
|
|
|
|
| 235 |
@app.get("/")
|
| 236 |
async def landing():
|
| 237 |
"""Define landing page."""
|
| 238 |
+
# return "Swagger UI at https://mikeee-baai-m3.hf.space/docs"
|
| 239 |
+
return RedirectResponse("/docs")
|
| 240 |
|
| 241 |
|
| 242 |
@app.post("/embed/", response_model=EmbedResponse)
|