Spaces:

prathamesh788
/

pravaah

Sleeping

pravaah / classifier.py

Prathamesh Sutar

Deployment optimized

f0663fb about 1 month ago

2.53 kB

	from transformers import pipeline
	from scraper import fetch_hazard_tweets
	from translate import translate_to_english
	from sentiment import classify_emotion_text
	from ner import extract_hazard_and_locations
	import json

	model_name = "cross-encoder/nli-deberta-v3-base"

	# Lazy loading - only load when needed
	classifier = None

	def get_classifier():
	"""Lazy load classifier to avoid startup delay"""
	global classifier
	if classifier is None:
	classifier = pipeline("zero-shot-classification", model=model_name, framework="pt")
	return classifier

	def classify_with_model(tweet_text):
	"""
	Classifies a tweet using DeBERTa-v3 cross-encoder for zero-shot classification.
	Returns 1 if hazardous, else 0.
	"""
	if not tweet_text or not tweet_text.strip():
	return 0
	candidate_labels = ["report of an ocean hazard", "not an ocean hazard"]
	classifier_instance = get_classifier()
	result = classifier_instance(tweet_text, candidate_labels)
	top_label = result['labels'][0]
	top_score = result['scores'][0]
	if top_label == "report of an ocean hazard" and top_score > 0.75:
	return 1
	return 0

	def classify_tweets(tweets):
	"""
	Accepts list of tweet dicts with 'text' field.
	Pipeline: translate -> classify hazard -> if hazardous, sentiment -> NER.
	Returns enriched dicts.
	"""
	classified = []
	for t in tweets:
	text = t.get('text', '')
	item = dict(t)

	# Step 1: Translate ALL tweets first (more efficient)
	translated = translate_to_english(text)
	item['translated_text'] = translated

	# Step 2: Classify using translated text (more accurate)
	hazardous = classify_with_model(translated)
	item['hazardous'] = hazardous

	# Step 3: If hazardous, do additional analysis
	if hazardous == 1:
	sentiment = classify_emotion_text(translated)
	item['sentiment'] = sentiment
	ner_info = extract_hazard_and_locations(translated)
	item['ner'] = ner_info
	else:
	# For non-hazardous tweets, still extract basic info
	item['sentiment'] = {"label": "neutral", "score": 0.0}
	item['ner'] = {"hazards": [], "locations": []}

	classified.append(item)
	return classified

	if __name__ == "__main__":
	tweets = fetch_hazard_tweets(limit=20)
	classified = classify_tweets(tweets)
	print(json.dumps(classified, indent=2, ensure_ascii=False))