Spaces:

AISimplyExplained
/

deberta_api

Sleeping

App Files Files Community

deberta_api / main.py

AISimplyExplained

Update main.py

99351b6 verified over 1 year ago

raw

history blame

2.78 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
	import torch
	from detoxify import Detoxify



	class Guardrail:
	def __init__(self):
	tokenizer = AutoTokenizer.from_pretrained("ProtectAI/deberta-v3-base-prompt-injection")
	model = AutoModelForSequenceClassification.from_pretrained("ProtectAI/deberta-v3-base-prompt-injection")

	self.classifier = pipeline(
	"text-classification",
	model=model,
	tokenizer=tokenizer,
	truncation=True,
	max_length=512,
	device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
	)

	def guard(self, prompt):
	return self.classifier(prompt)

	def determine_level(self, label, score):
	if label == "SAFE":
	return 0, "safe"
	else:
	if score > 0.9:
	return 4, "high"
	elif score > 0.75:
	return 3, "medium"
	elif score > 0.5:
	return 2, "low"
	else:
	return 1, "very low"


	class TextPrompt(BaseModel):
	prompt: str


	class ClassificationResult(BaseModel):
	label: str
	score: float
	level: int
	severity_label: str


	app = FastAPI()
	guardrail = Guardrail()
	toxicity_classifier = Detoxify('original')

	class ToxicityResult(BaseModel):
	toxicity: float
	severe_toxicity: float
	obscene: float
	threat: float
	insult: float
	identity_attack: float

	@app.post("/api/models/toxicity/classify", response_model=ToxicityResult)
	def classify_toxicity(text_prompt: TextPrompt):
	try:
	result = toxicity_classifier.predict(text_prompt.prompt)
	return {
	"toxicity": result['toxicity'],
	"severe_toxicity": result['severe_toxicity'],
	"obscene": result['obscene'],
	"threat": result['threat'],
	"insult": result['insult'],
	"identity_attack": result['identity_attack']
	}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))


	@app.post("/api/models/PromptInjection/classify", response_model=ClassificationResult)
	def classify_text(text_prompt: TextPrompt):
	try:
	result = guardrail.guard(text_prompt.prompt)
	label = result[0]['label']
	score = result[0]['score']
	level, severity_label = guardrail.determine_level(label, score)
	return {"label": label, "score": score, "level": level, "severity_label": severity_label}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))


	if __name__ == "__main__":
	import uvicorn

	uvicorn.run(app, host="0.0.0.0", port=8000)