sms-spam-classification

Sleeping

App Files Files Community

e-hossam96 commited on May 14

Commit

39562f3

1 Parent(s): cd4038f

lower expectations for a 2 vCPU instance

Browse files

Files changed (1) hide show

main.py +13 -13

main.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 import uuid
 import logging
 import asyncio
@@ -20,8 +20,8 @@ from optimum.onnxruntime import ORTModelForSequenceClassification
 transformers.set_seed(42)
 MODEL_NAME = "distilroberta-base-climate-sentiment-onnx-quantized"
-BATCH_PROCESS_INTERVAL = 0.01
-MAX_BATCH_SIZE = 128
 # ----------------------------- #
 #        Shared Storage         #
@@ -52,6 +52,8 @@ def load_classifier(model_name: str):
     model = ORTModelForSequenceClassification.from_pretrained(
         model_name,
     )
     return pipeline(
         task="text-classification",
         accelerator="ort",
@@ -59,7 +61,7 @@ def load_classifier(model_name: str):
         tokenizer=tokenizer,
         framework="pt",
         batch_size=MAX_BATCH_SIZE,
-        num_workers=os.cpu_count(),
     )
@@ -108,13 +110,15 @@ async def process_queue():
 async def lifespan(_: FastAPI):
     global classifier
     classifier = load_classifier(MODEL_NAME)
-    _ = classifier("Startup warm-up sentence.")
     logger.info("Model loaded successfully.")
     queue_task = asyncio.create_task(process_queue())
     yield
     queue_task.cancel()
     logger.info("Shutting down the application...")
     logger.info("Model unloaded successfully.")
     try:
         await queue_task
     except asyncio.CancelledError:
@@ -133,18 +137,14 @@ app = FastAPI(lifespan=lifespan)
 @app.post("/classify")
 async def classify(query: Query):
     logger.info(f"{query.sentence}")
-    query_id = str(uuid.uuid4())
     await query_queue.put({"id": query_id, "sentence": query.sentence})
-    result = None
-    while result is None:
         async with lock:
             if query_id in results:
-                result = results.pop(query_id)
-        if result is None:
-            await asyncio.sleep(0.1)
-    return {"id": query_id, "result": result}
 app.mount("/", StaticFiles(directory="static", html=True), name="static")

+import gc
 import uuid
 import logging
 import asyncio
 transformers.set_seed(42)
 MODEL_NAME = "distilroberta-base-climate-sentiment-onnx-quantized"
+BATCH_PROCESS_INTERVAL = 0.05
+MAX_BATCH_SIZE = 16
 # ----------------------------- #
 #        Shared Storage         #
     model = ORTModelForSequenceClassification.from_pretrained(
         model_name,
     )
+    gc.collect()
     return pipeline(
         task="text-classification",
         accelerator="ort",
         tokenizer=tokenizer,
         framework="pt",
         batch_size=MAX_BATCH_SIZE,
+        num_workers=1,
     )
 async def lifespan(_: FastAPI):
     global classifier
     classifier = load_classifier(MODEL_NAME)
+    _ = classifier("Hi")
     logger.info("Model loaded successfully.")
     queue_task = asyncio.create_task(process_queue())
     yield
     queue_task.cancel()
     logger.info("Shutting down the application...")
     logger.info("Model unloaded successfully.")
+    classifier = None
+    gc.collect()
     try:
         await queue_task
     except asyncio.CancelledError:
 @app.post("/classify")
 async def classify(query: Query):
     logger.info(f"{query.sentence}")
+    query_id = uuid.uuid4().hex
     await query_queue.put({"id": query_id, "sentence": query.sentence})
+    while True:
         async with lock:
             if query_id in results:
+                return {"id": query_id, "result": results.pop(query_id)}
+        await asyncio.sleep(0.1)
 app.mount("/", StaticFiles(directory="static", html=True), name="static")