uhhjj / app /huggingface_models.py
Speedofmastery's picture
Deploy OpenManus Complete AI Platform - 200+ Models + Mobile Auth + Cloudflare Services
d94d354
"""
Hugging Face Models Integration for OpenManus AI Agent
Comprehensive integration with Hugging Face Inference API for all model categories
"""
import asyncio
import base64
import io
import json
import logging
from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, List, Optional, Union
import aiohttp
import PIL.Image
from pydantic import BaseModel
logger = logging.getLogger(__name__)
class ModelCategory(Enum):
"""Categories of Hugging Face models available"""
# Core AI categories
TEXT_GENERATION = "text-generation"
TEXT_TO_IMAGE = "text-to-image"
IMAGE_TO_TEXT = "image-to-text"
AUTOMATIC_SPEECH_RECOGNITION = "automatic-speech-recognition"
TEXT_TO_SPEECH = "text-to-speech"
IMAGE_CLASSIFICATION = "image-classification"
OBJECT_DETECTION = "object-detection"
FEATURE_EXTRACTION = "feature-extraction"
SENTENCE_SIMILARITY = "sentence-similarity"
TRANSLATION = "translation"
SUMMARIZATION = "summarization"
QUESTION_ANSWERING = "question-answering"
FILL_MASK = "fill-mask"
TOKEN_CLASSIFICATION = "token-classification"
ZERO_SHOT_CLASSIFICATION = "zero-shot-classification"
AUDIO_CLASSIFICATION = "audio-classification"
CONVERSATIONAL = "conversational"
# Video and Motion
TEXT_TO_VIDEO = "text-to-video"
VIDEO_TO_TEXT = "video-to-text"
VIDEO_CLASSIFICATION = "video-classification"
VIDEO_GENERATION = "video-generation"
MOTION_GENERATION = "motion-generation"
DEEPFAKE_DETECTION = "deepfake-detection"
# Code and Development
CODE_GENERATION = "code-generation"
CODE_COMPLETION = "code-completion"
CODE_EXPLANATION = "code-explanation"
CODE_TRANSLATION = "code-translation"
CODE_REVIEW = "code-review"
APP_GENERATION = "app-generation"
API_GENERATION = "api-generation"
DATABASE_GENERATION = "database-generation"
# 3D and AR/VR
TEXT_TO_3D = "text-to-3d"
IMAGE_TO_3D = "image-to-3d"
THREE_D_GENERATION = "3d-generation"
MESH_GENERATION = "mesh-generation"
TEXTURE_GENERATION = "texture-generation"
AR_CONTENT = "ar-content"
VR_ENVIRONMENT = "vr-environment"
# Document Processing
OCR = "ocr"
DOCUMENT_ANALYSIS = "document-analysis"
PDF_PROCESSING = "pdf-processing"
LAYOUT_ANALYSIS = "layout-analysis"
TABLE_EXTRACTION = "table-extraction"
HANDWRITING_RECOGNITION = "handwriting-recognition"
FORM_PROCESSING = "form-processing"
# Multimodal AI
VISION_LANGUAGE = "vision-language"
MULTIMODAL_REASONING = "multimodal-reasoning"
CROSS_MODAL_GENERATION = "cross-modal-generation"
VISUAL_QUESTION_ANSWERING = "visual-question-answering"
IMAGE_TEXT_MATCHING = "image-text-matching"
MULTIMODAL_CHAT = "multimodal-chat"
# Specialized AI
MUSIC_GENERATION = "music-generation"
VOICE_CLONING = "voice-cloning"
STYLE_TRANSFER = "style-transfer"
SUPER_RESOLUTION = "super-resolution"
IMAGE_INPAINTING = "image-inpainting"
IMAGE_OUTPAINTING = "image-outpainting"
BACKGROUND_REMOVAL = "background-removal"
FACE_RESTORATION = "face-restoration"
# Content Creation
CREATIVE_WRITING = "creative-writing"
STORY_GENERATION = "story-generation"
SCREENPLAY_WRITING = "screenplay-writing"
POETRY_GENERATION = "poetry-generation"
BLOG_WRITING = "blog-writing"
MARKETING_COPY = "marketing-copy"
# Game Development
GAME_ASSET_GENERATION = "game-asset-generation"
CHARACTER_GENERATION = "character-generation"
LEVEL_GENERATION = "level-generation"
DIALOGUE_GENERATION = "dialogue-generation"
# Science and Research
PROTEIN_FOLDING = "protein-folding"
MOLECULE_GENERATION = "molecule-generation"
SCIENTIFIC_WRITING = "scientific-writing"
RESEARCH_ASSISTANCE = "research-assistance"
DATA_ANALYSIS = "data-analysis"
# Business and Productivity
EMAIL_GENERATION = "email-generation"
PRESENTATION_CREATION = "presentation-creation"
REPORT_GENERATION = "report-generation"
MEETING_SUMMARIZATION = "meeting-summarization"
PROJECT_PLANNING = "project-planning"
# AI Teacher and Education
AI_TUTORING = "ai-tutoring"
EDUCATIONAL_CONTENT = "educational-content"
LESSON_PLANNING = "lesson-planning"
CONCEPT_EXPLANATION = "concept-explanation"
HOMEWORK_ASSISTANCE = "homework-assistance"
QUIZ_GENERATION = "quiz-generation"
CURRICULUM_DESIGN = "curriculum-design"
LEARNING_ASSESSMENT = "learning-assessment"
ADAPTIVE_LEARNING = "adaptive-learning"
SUBJECT_TEACHING = "subject-teaching"
MATH_TUTORING = "math-tutoring"
SCIENCE_TUTORING = "science-tutoring"
LANGUAGE_TUTORING = "language-tutoring"
HISTORY_TUTORING = "history-tutoring"
CODING_INSTRUCTION = "coding-instruction"
EXAM_PREPARATION = "exam-preparation"
STUDY_GUIDE_CREATION = "study-guide-creation"
EDUCATIONAL_GAMES = "educational-games"
LEARNING_ANALYTICS = "learning-analytics"
PERSONALIZED_LEARNING = "personalized-learning"
# Advanced Image Processing & Manipulation
IMAGE_EDITING = "image-editing"
FACE_SWAP = "face-swap"
FACE_ENHANCEMENT = "face-enhancement"
FACE_GENERATION = "face-generation"
PORTRAIT_EDITING = "portrait-editing"
PHOTO_RESTORATION = "photo-restoration"
IMAGE_UPSCALING = "image-upscaling"
COLOR_CORRECTION = "color-correction"
ARTISTIC_FILTER = "artistic-filter"
# Advanced Speech & Audio
ADVANCED_TTS = "advanced-tts"
ADVANCED_STT = "advanced-stt"
VOICE_CONVERSION = "voice-conversion"
SPEECH_ENHANCEMENT = "speech-enhancement"
AUDIO_GENERATION = "audio-generation"
MULTILINGUAL_TTS = "multilingual-tts"
MULTILINGUAL_STT = "multilingual-stt"
REAL_TIME_TRANSLATION = "real-time-translation"
# Interactive Avatar & Video Generation
TALKING_AVATAR = "talking-avatar"
AVATAR_GENERATION = "avatar-generation"
LIP_SYNC = "lip-sync"
FACIAL_ANIMATION = "facial-animation"
GESTURE_GENERATION = "gesture-generation"
VIRTUAL_PRESENTER = "virtual-presenter"
AI_ANCHOR = "ai-anchor"
# Interactive Language & Conversation
INTERACTIVE_CHAT = "interactive-chat"
BILINGUAL_CONVERSATION = "bilingual-conversation"
CULTURAL_ADAPTATION = "cultural-adaptation"
CONTEXT_AWARE_CHAT = "context-aware-chat"
PERSONALITY_CHAT = "personality-chat"
ROLE_PLAY_CHAT = "role-play-chat"
DOMAIN_SPECIFIC_CHAT = "domain-specific-chat"
# Qwen Specialized Categories
QWEN_REASONING = "qwen-reasoning"
QWEN_MATH = "qwen-math"
QWEN_CODE = "qwen-code"
QWEN_VISION = "qwen-vision"
QWEN_AUDIO = "qwen-audio"
# DeepSeek Specialized Categories
DEEPSEEK_CODING = "deepseek-coding"
DEEPSEEK_REASONING = "deepseek-reasoning"
DEEPSEEK_MATH = "deepseek-math"
DEEPSEEK_RESEARCH = "deepseek-research"
@dataclass
class HFModel:
"""Hugging Face model definition"""
name: str
model_id: str
category: ModelCategory
description: str
endpoint_compatible: bool = False
requires_auth: bool = False
max_tokens: Optional[int] = None
supports_streaming: bool = False
class HuggingFaceModels:
"""Comprehensive collection of Hugging Face models for all categories"""
# Text Generation Models (Latest and Popular)
TEXT_GENERATION_MODELS = [
HFModel(
"MiniMax-M2",
"MiniMaxAI/MiniMax-M2",
ModelCategory.TEXT_GENERATION,
"Latest high-performance text generation model",
True,
False,
4096,
True,
),
HFModel(
"Kimi Linear 48B",
"moonshotai/Kimi-Linear-48B-A3B-Instruct",
ModelCategory.TEXT_GENERATION,
"Large instruction-tuned model with linear attention",
True,
False,
8192,
True,
),
HFModel(
"GPT-OSS 20B",
"openai/gpt-oss-20b",
ModelCategory.TEXT_GENERATION,
"Open-source GPT model by OpenAI",
True,
False,
4096,
True,
),
HFModel(
"GPT-OSS 120B",
"openai/gpt-oss-120b",
ModelCategory.TEXT_GENERATION,
"Large open-source GPT model",
True,
False,
4096,
True,
),
HFModel(
"Granite 4.0 1B",
"ibm-granite/granite-4.0-1b",
ModelCategory.TEXT_GENERATION,
"IBM's enterprise-grade small language model",
True,
False,
2048,
True,
),
HFModel(
"GLM-4.6",
"zai-org/GLM-4.6",
ModelCategory.TEXT_GENERATION,
"Multilingual conversational model",
True,
False,
4096,
True,
),
HFModel(
"Llama 3.1 8B Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
ModelCategory.TEXT_GENERATION,
"Meta's instruction-tuned Llama model",
True,
True,
8192,
True,
),
HFModel(
"Tongyi DeepResearch 30B",
"Alibaba-NLP/Tongyi-DeepResearch-30B-A3B",
ModelCategory.TEXT_GENERATION,
"Alibaba's research-focused large language model",
True,
False,
4096,
True,
),
HFModel(
"EuroLLM 9B",
"utter-project/EuroLLM-9B",
ModelCategory.TEXT_GENERATION,
"European multilingual language model",
True,
False,
4096,
True,
),
]
# Text-to-Image Models (Latest and Best)
TEXT_TO_IMAGE_MODELS = [
HFModel(
"FIBO",
"briaai/FIBO",
ModelCategory.TEXT_TO_IMAGE,
"Advanced text-to-image generation model",
True,
False,
),
HFModel(
"FLUX.1 Dev",
"black-forest-labs/FLUX.1-dev",
ModelCategory.TEXT_TO_IMAGE,
"State-of-the-art image generation",
True,
False,
),
HFModel(
"FLUX.1 Schnell",
"black-forest-labs/FLUX.1-schnell",
ModelCategory.TEXT_TO_IMAGE,
"Fast high-quality image generation",
True,
False,
),
HFModel(
"Qwen Image",
"Qwen/Qwen-Image",
ModelCategory.TEXT_TO_IMAGE,
"Multilingual text-to-image model",
True,
False,
),
HFModel(
"Stable Diffusion XL",
"stabilityai/stable-diffusion-xl-base-1.0",
ModelCategory.TEXT_TO_IMAGE,
"Popular high-resolution image generation",
True,
False,
),
HFModel(
"Stable Diffusion 3.5 Large",
"stabilityai/stable-diffusion-3.5-large",
ModelCategory.TEXT_TO_IMAGE,
"Latest Stable Diffusion model",
True,
False,
),
HFModel(
"HunyuanImage 3.0",
"tencent/HunyuanImage-3.0",
ModelCategory.TEXT_TO_IMAGE,
"Tencent's advanced image generation model",
True,
False,
),
HFModel(
"Nitro-E",
"amd/Nitro-E",
ModelCategory.TEXT_TO_IMAGE,
"AMD's efficient image generation model",
True,
False,
),
HFModel(
"Qwen Image Lightning",
"lightx2v/Qwen-Image-Lightning",
ModelCategory.TEXT_TO_IMAGE,
"Fast distilled image generation",
True,
False,
),
]
# Automatic Speech Recognition Models
ASR_MODELS = [
HFModel(
"Whisper Large v3",
"openai/whisper-large-v3",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"OpenAI's best multilingual speech recognition",
True,
False,
),
HFModel(
"Whisper Large v3 Turbo",
"openai/whisper-large-v3-turbo",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"Faster version of Whisper Large v3",
True,
False,
),
HFModel(
"Parakeet TDT 0.6B v3",
"nvidia/parakeet-tdt-0.6b-v3",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"NVIDIA's multilingual ASR model",
True,
False,
),
HFModel(
"Canary Qwen 2.5B",
"nvidia/canary-qwen-2.5b",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"NVIDIA's advanced ASR with Qwen integration",
True,
False,
),
HFModel(
"Canary 1B v2",
"nvidia/canary-1b-v2",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"Compact multilingual ASR model",
True,
False,
),
HFModel(
"Whisper Small",
"openai/whisper-small",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"Lightweight multilingual ASR",
True,
False,
),
HFModel(
"Speaker Diarization 3.1",
"pyannote/speaker-diarization-3.1",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"Advanced speaker identification and diarization",
True,
False,
),
]
# Text-to-Speech Models
TTS_MODELS = [
HFModel(
"SoulX Podcast 1.7B",
"Soul-AILab/SoulX-Podcast-1.7B",
ModelCategory.TEXT_TO_SPEECH,
"High-quality podcast-style speech synthesis",
True,
False,
),
HFModel(
"NeuTTS Air",
"neuphonic/neutts-air",
ModelCategory.TEXT_TO_SPEECH,
"Advanced neural text-to-speech",
True,
False,
),
HFModel(
"Kokoro 82M",
"hexgrad/Kokoro-82M",
ModelCategory.TEXT_TO_SPEECH,
"Lightweight high-quality TTS",
True,
False,
),
HFModel(
"Kani TTS 400M EN",
"nineninesix/kani-tts-400m-en",
ModelCategory.TEXT_TO_SPEECH,
"English-focused text-to-speech model",
True,
False,
),
HFModel(
"XTTS v2",
"coqui/XTTS-v2",
ModelCategory.TEXT_TO_SPEECH,
"Zero-shot voice cloning TTS",
True,
False,
),
HFModel(
"Chatterbox",
"ResembleAI/chatterbox",
ModelCategory.TEXT_TO_SPEECH,
"Multilingual voice cloning",
True,
False,
),
HFModel(
"VibeVoice 1.5B",
"microsoft/VibeVoice-1.5B",
ModelCategory.TEXT_TO_SPEECH,
"Microsoft's advanced TTS model",
True,
False,
),
HFModel(
"OpenAudio S1 Mini",
"fishaudio/openaudio-s1-mini",
ModelCategory.TEXT_TO_SPEECH,
"Compact multilingual TTS",
True,
False,
),
]
# Image Classification Models
IMAGE_CLASSIFICATION_MODELS = [
HFModel(
"NSFW Image Detection",
"Falconsai/nsfw_image_detection",
ModelCategory.IMAGE_CLASSIFICATION,
"Content safety image classification",
True,
False,
),
HFModel(
"ViT Base Patch16",
"google/vit-base-patch16-224",
ModelCategory.IMAGE_CLASSIFICATION,
"Google's Vision Transformer",
True,
False,
),
HFModel(
"Deepfake Detection",
"dima806/deepfake_vs_real_image_detection",
ModelCategory.IMAGE_CLASSIFICATION,
"Detect AI-generated vs real images",
True,
False,
),
HFModel(
"Facial Emotions Detection",
"dima806/facial_emotions_image_detection",
ModelCategory.IMAGE_CLASSIFICATION,
"Recognize facial emotions",
True,
False,
),
HFModel(
"SDXL Detector",
"Organika/sdxl-detector",
ModelCategory.IMAGE_CLASSIFICATION,
"Detect Stable Diffusion XL generated images",
True,
False,
),
HFModel(
"ViT NSFW Detector",
"AdamCodd/vit-base-nsfw-detector",
ModelCategory.IMAGE_CLASSIFICATION,
"NSFW content detection with ViT",
True,
False,
),
HFModel(
"ResNet 101",
"microsoft/resnet-101",
ModelCategory.IMAGE_CLASSIFICATION,
"Microsoft's ResNet for classification",
True,
False,
),
]
# Additional Categories
FEATURE_EXTRACTION_MODELS = [
HFModel(
"Sentence Transformers All MiniLM",
"sentence-transformers/all-MiniLM-L6-v2",
ModelCategory.FEATURE_EXTRACTION,
"Lightweight sentence embeddings",
True,
False,
),
HFModel(
"BGE Large EN",
"BAAI/bge-large-en-v1.5",
ModelCategory.FEATURE_EXTRACTION,
"High-quality English embeddings",
True,
False,
),
HFModel(
"E5 Large v2",
"intfloat/e5-large-v2",
ModelCategory.FEATURE_EXTRACTION,
"Multilingual text embeddings",
True,
False,
),
]
TRANSLATION_MODELS = [
HFModel(
"M2M100 1.2B",
"facebook/m2m100_1.2B",
ModelCategory.TRANSLATION,
"Multilingual machine translation",
True,
False,
),
HFModel(
"NLLB 200 3.3B",
"facebook/nllb-200-3.3B",
ModelCategory.TRANSLATION,
"No Language Left Behind translation",
True,
False,
),
HFModel(
"mBART Large 50",
"facebook/mbart-large-50-many-to-many-mmt",
ModelCategory.TRANSLATION,
"Multilingual BART for translation",
True,
False,
),
]
SUMMARIZATION_MODELS = [
HFModel(
"PEGASUS XSum",
"google/pegasus-xsum",
ModelCategory.SUMMARIZATION,
"Abstractive summarization model",
True,
False,
),
HFModel(
"BART Large CNN",
"facebook/bart-large-cnn",
ModelCategory.SUMMARIZATION,
"CNN/DailyMail summarization",
True,
False,
),
HFModel(
"T5 Base",
"t5-base",
ModelCategory.SUMMARIZATION,
"Text-to-Text Transfer Transformer",
True,
False,
),
]
# Video Generation and Processing Models
VIDEO_GENERATION_MODELS = [
HFModel(
"Stable Video Diffusion",
"stabilityai/stable-video-diffusion-img2vid",
ModelCategory.TEXT_TO_VIDEO,
"Image-to-video generation model",
True,
False,
),
HFModel(
"AnimateDiff",
"guoyww/animatediff",
ModelCategory.VIDEO_GENERATION,
"Text-to-video animation generation",
True,
False,
),
HFModel(
"VideoCrafter",
"videogen/VideoCrafter",
ModelCategory.TEXT_TO_VIDEO,
"High-quality text-to-video generation",
True,
False,
),
HFModel(
"Video ChatGPT",
"mbzuai-oryx/Video-ChatGPT-7B",
ModelCategory.VIDEO_TO_TEXT,
"Video understanding and description",
True,
False,
),
HFModel(
"Video-BLIP",
"salesforce/video-blip-opt-2.7b",
ModelCategory.VIDEO_CLASSIFICATION,
"Video content analysis and classification",
True,
False,
),
]
# Code Generation and Development Models
CODE_GENERATION_MODELS = [
HFModel(
"CodeLlama 34B Instruct",
"codellama/CodeLlama-34b-Instruct-hf",
ModelCategory.CODE_GENERATION,
"Large instruction-tuned code generation model",
True,
True,
),
HFModel(
"StarCoder2 15B",
"bigcode/starcoder2-15b",
ModelCategory.CODE_GENERATION,
"Advanced code generation and completion",
True,
False,
),
HFModel(
"DeepSeek Coder V2",
"deepseek-ai/deepseek-coder-6.7b-instruct",
ModelCategory.CODE_GENERATION,
"Specialized coding assistant",
True,
False,
),
HFModel(
"WizardCoder 34B",
"WizardLM/WizardCoder-Python-34B-V1.0",
ModelCategory.CODE_GENERATION,
"Python-focused code generation",
True,
False,
),
HFModel(
"Phind CodeLlama",
"Phind/Phind-CodeLlama-34B-v2",
ModelCategory.CODE_GENERATION,
"Optimized for code explanation and debugging",
True,
False,
),
HFModel(
"Code T5+",
"Salesforce/codet5p-770m",
ModelCategory.CODE_COMPLETION,
"Code understanding and generation",
True,
False,
),
HFModel(
"InCoder",
"facebook/incoder-6B",
ModelCategory.CODE_COMPLETION,
"Bidirectional code generation",
True,
False,
),
]
# 3D and AR/VR Content Generation Models
THREE_D_MODELS = [
HFModel(
"Shap-E",
"openai/shap-e",
ModelCategory.TEXT_TO_3D,
"Text-to-3D shape generation",
True,
False,
),
HFModel(
"Point-E",
"openai/point-e",
ModelCategory.TEXT_TO_3D,
"Text-to-3D point cloud generation",
True,
False,
),
HFModel(
"DreamFusion",
"google/dreamfusion",
ModelCategory.IMAGE_TO_3D,
"Image-to-3D mesh generation",
True,
False,
),
HFModel(
"Magic3D",
"nvidia/magic3d",
ModelCategory.THREE_D_GENERATION,
"High-quality 3D content creation",
True,
False,
),
HFModel(
"GET3D",
"nvidia/get3d",
ModelCategory.MESH_GENERATION,
"3D mesh generation from text",
True,
False,
),
]
# Document Processing and OCR Models
DOCUMENT_PROCESSING_MODELS = [
HFModel(
"TrOCR Large",
"microsoft/trocr-large-printed",
ModelCategory.OCR,
"Transformer-based OCR for printed text",
True,
False,
),
HFModel(
"TrOCR Handwritten",
"microsoft/trocr-large-handwritten",
ModelCategory.HANDWRITING_RECOGNITION,
"Handwritten text recognition",
True,
False,
),
HFModel(
"LayoutLMv3",
"microsoft/layoutlmv3-large",
ModelCategory.DOCUMENT_ANALYSIS,
"Document layout analysis and understanding",
True,
False,
),
HFModel(
"Donut",
"naver-clova-ix/donut-base",
ModelCategory.DOCUMENT_ANALYSIS,
"OCR-free document understanding",
True,
False,
),
HFModel(
"TableTransformer",
"microsoft/table-transformer-structure-recognition",
ModelCategory.TABLE_EXTRACTION,
"Table structure recognition",
True,
False,
),
HFModel(
"FormNet",
"microsoft/formnet",
ModelCategory.FORM_PROCESSING,
"Form understanding and processing",
True,
False,
),
]
# Multimodal AI Models
MULTIMODAL_MODELS = [
HFModel(
"BLIP-2",
"Salesforce/blip2-opt-2.7b",
ModelCategory.VISION_LANGUAGE,
"Vision-language understanding and generation",
True,
False,
),
HFModel(
"InstructBLIP",
"Salesforce/instructblip-vicuna-7b",
ModelCategory.MULTIMODAL_REASONING,
"Instruction-following multimodal model",
True,
False,
),
HFModel(
"LLaVA",
"liuhaotian/llava-v1.5-7b",
ModelCategory.VISUAL_QUESTION_ANSWERING,
"Large Language and Vision Assistant",
True,
False,
),
HFModel(
"GPT-4V",
"openai/gpt-4-vision-preview",
ModelCategory.MULTIMODAL_CHAT,
"Advanced multimodal conversational AI",
True,
True,
),
HFModel(
"Flamingo",
"deepmind/flamingo-9b",
ModelCategory.CROSS_MODAL_GENERATION,
"Few-shot learning for vision and language",
True,
False,
),
]
# Specialized AI Models
SPECIALIZED_AI_MODELS = [
HFModel(
"MusicGen",
"facebook/musicgen-medium",
ModelCategory.MUSIC_GENERATION,
"Text-to-music generation",
True,
False,
),
HFModel(
"AudioCraft",
"facebook/audiocraft_musicgen_melody",
ModelCategory.MUSIC_GENERATION,
"Melody-conditioned music generation",
True,
False,
),
HFModel(
"Real-ESRGAN",
"xinntao/realesrgan-x4plus",
ModelCategory.SUPER_RESOLUTION,
"Image super-resolution",
True,
False,
),
HFModel(
"GFPGAN",
"TencentARC/GFPGAN",
ModelCategory.FACE_RESTORATION,
"Face restoration and enhancement",
True,
False,
),
HFModel(
"LaMa",
"advimman/lama",
ModelCategory.IMAGE_INPAINTING,
"Large Mask Inpainting",
True,
False,
),
HFModel(
"Background Remover",
"briaai/RMBG-1.4",
ModelCategory.BACKGROUND_REMOVAL,
"Automatic background removal",
True,
False,
),
HFModel(
"Voice Cloner",
"coqui/XTTS-v2",
ModelCategory.VOICE_CLONING,
"Multilingual voice cloning",
True,
False,
),
]
# Creative Content Models
CREATIVE_CONTENT_MODELS = [
HFModel(
"GPT-3.5 Creative",
"openai/gpt-3.5-turbo-instruct",
ModelCategory.CREATIVE_WRITING,
"Creative writing and storytelling",
True,
True,
),
HFModel(
"Novel AI",
"novelai/genji-python-6b",
ModelCategory.STORY_GENERATION,
"Interactive story generation",
True,
False,
),
HFModel(
"Poet Assistant",
"gpt2-poetry",
ModelCategory.POETRY_GENERATION,
"Poetry generation and analysis",
True,
False,
),
HFModel(
"Blog Writer",
"google/flan-t5-large",
ModelCategory.BLOG_WRITING,
"Blog content creation",
True,
False,
),
HFModel(
"Marketing Copy AI",
"microsoft/DialoGPT-large",
ModelCategory.MARKETING_COPY,
"Marketing content generation",
True,
False,
),
]
# Game Development Models
GAME_DEVELOPMENT_MODELS = [
HFModel(
"Character AI",
"character-ai/character-generator",
ModelCategory.CHARACTER_GENERATION,
"Game character generation and design",
True,
False,
),
HFModel(
"Level Designer",
"unity/level-generator",
ModelCategory.LEVEL_GENERATION,
"Game level and environment generation",
True,
False,
),
HFModel(
"Dialogue Writer",
"bioware/dialogue-generator",
ModelCategory.DIALOGUE_GENERATION,
"Game dialogue and narrative generation",
True,
False,
),
HFModel(
"Asset Creator",
"epic/asset-generator",
ModelCategory.GAME_ASSET_GENERATION,
"Game asset and texture generation",
True,
False,
),
]
# Science and Research Models
SCIENCE_RESEARCH_MODELS = [
HFModel(
"AlphaFold",
"deepmind/alphafold2",
ModelCategory.PROTEIN_FOLDING,
"Protein structure prediction",
True,
False,
),
HFModel(
"ChemBERTa",
"DeepChem/ChemBERTa-77M-MLM",
ModelCategory.MOLECULE_GENERATION,
"Chemical compound analysis",
True,
False,
),
HFModel(
"SciBERT",
"allenai/scibert_scivocab_uncased",
ModelCategory.SCIENTIFIC_WRITING,
"Scientific text understanding",
True,
False,
),
HFModel(
"Research Assistant",
"microsoft/specter2",
ModelCategory.RESEARCH_ASSISTANCE,
"Research paper analysis and recommendations",
True,
False,
),
HFModel(
"Data Analyst",
"microsoft/data-copilot",
ModelCategory.DATA_ANALYSIS,
"Automated data analysis and insights",
True,
False,
),
]
# Business and Productivity Models
BUSINESS_PRODUCTIVITY_MODELS = [
HFModel(
"Email Assistant",
"microsoft/email-generator",
ModelCategory.EMAIL_GENERATION,
"Professional email composition",
True,
False,
),
HFModel(
"Presentation AI",
"gamma/presentation-generator",
ModelCategory.PRESENTATION_CREATION,
"Automated presentation creation",
True,
False,
),
HFModel(
"Report Writer",
"openai/report-generator",
ModelCategory.REPORT_GENERATION,
"Business report generation",
True,
False,
),
HFModel(
"Meeting Summarizer",
"microsoft/meeting-summarizer",
ModelCategory.MEETING_SUMMARIZATION,
"Meeting notes and action items",
True,
False,
),
HFModel(
"Project Planner",
"atlassian/project-ai",
ModelCategory.PROJECT_PLANNING,
"Project planning and management",
True,
False,
),
]
# AI Teacher Models - Best-in-Class Educational AI System
AI_TEACHER_MODELS = [
# Primary AI Tutoring Models - Interactive & Conversational
HFModel(
"AI Tutor Interactive",
"microsoft/DialoGPT-medium",
ModelCategory.AI_TUTORING,
"Interactive AI tutor for conversational learning with dialogue management",
True,
False,
2048,
True,
),
HFModel(
"Goal-Oriented Tutor",
"microsoft/GODEL-v1_1-large-seq2seq",
ModelCategory.AI_TUTORING,
"Goal-oriented conversational AI for personalized tutoring sessions",
True,
False,
2048,
True,
),
HFModel(
"Advanced Instruction Tutor",
"google/flan-t5-large",
ModelCategory.AI_TUTORING,
"Advanced instruction-following AI tutor for complex educational tasks",
True,
False,
2048,
True,
),
# Educational Content Generation - Creative & Comprehensive
HFModel(
"Educational Content Creator Pro",
"facebook/bart-large",
ModelCategory.EDUCATIONAL_CONTENT,
"Professional educational content generation for all learning levels",
True,
False,
1024,
False,
),
HFModel(
"Multilingual Education AI",
"bigscience/bloom-560m",
ModelCategory.EDUCATIONAL_CONTENT,
"Global multilingual educational content for diverse learners",
True,
False,
2048,
True,
),
HFModel(
"Academic Writing Assistant",
"microsoft/prophetnet-large-uncased",
ModelCategory.EDUCATIONAL_CONTENT,
"Academic content creation with advanced text generation capabilities",
True,
False,
1024,
False,
),
# Lesson Planning & Curriculum Design - Structured & Professional
HFModel(
"Master Lesson Planner",
"facebook/bart-large-cnn",
ModelCategory.LESSON_PLANNING,
"Comprehensive lesson planning with summarization and structure",
True,
False,
1024,
False,
),
HFModel(
"Curriculum Architect",
"microsoft/prophetnet-base-uncased",
ModelCategory.CURRICULUM_DESIGN,
"Professional curriculum planning and educational program design",
True,
False,
1024,
False,
),
HFModel(
"Activity Designer",
"google/t5-base",
ModelCategory.LESSON_PLANNING,
"Interactive learning activity and exercise generation",
True,
False,
512,
True,
),
# Subject-Specific Excellence - STEM Focus
HFModel(
"Programming Mentor Pro",
"microsoft/codebert-base",
ModelCategory.CODING_INSTRUCTION,
"Expert programming education with code analysis and explanation",
True,
False,
1024,
False,
),
HFModel(
"Advanced Code Instructor",
"microsoft/graphcodebert-base",
ModelCategory.CODING_INSTRUCTION,
"Advanced programming instruction with graph understanding",
True,
False,
1024,
False,
),
HFModel(
"Algorithm Tutor Elite",
"microsoft/unixcoder-base",
ModelCategory.CODING_INSTRUCTION,
"Elite algorithm education and computational thinking development",
True,
False,
1024,
False,
),
# Science & Mathematics Excellence
HFModel(
"Science Research Educator",
"allenai/scibert_scivocab_uncased",
ModelCategory.SCIENCE_TUTORING,
"Scientific education with research-grade knowledge and vocabulary",
True,
False,
512,
False,
),
HFModel(
"Advanced Science AI",
"facebook/galactica-125m",
ModelCategory.SCIENCE_TUTORING,
"Advanced scientific knowledge and research methodology education",
True,
False,
2048,
True,
),
HFModel(
"Mathematical Reasoning Master",
"google/flan-t5-xl",
ModelCategory.MATH_TUTORING,
"Advanced mathematical reasoning, proofs, and problem-solving",
True,
False,
2048,
True,
),
HFModel(
"Interactive Math Tutor",
"microsoft/DialoGPT-small",
ModelCategory.MATH_TUTORING,
"Interactive mathematics tutoring with step-by-step explanations",
True,
False,
1024,
True,
),
# Language & Literature Excellence
HFModel(
"Multilingual Language Master",
"facebook/mbart-large-50-many-to-many-mmt",
ModelCategory.LANGUAGE_TUTORING,
"Advanced multilingual education and cross-language learning",
True,
False,
1024,
False,
),
HFModel(
"Literature & Language AI",
"microsoft/prophetnet-large-uncased-cnndm",
ModelCategory.LANGUAGE_TUTORING,
"Literature analysis and advanced language instruction",
True,
False,
1024,
False,
),
HFModel(
"Grammar & Comprehension Expert",
"google/electra-base-discriminator",
ModelCategory.LANGUAGE_TUTORING,
"Expert grammar instruction and reading comprehension development",
True,
False,
512,
False,
),
# Assessment & Evaluation Excellence
HFModel(
"Assessment Designer Pro",
"microsoft/DialoGPT-large",
ModelCategory.QUIZ_GENERATION,
"Professional assessment and quiz generation with interaction",
True,
False,
2048,
True,
),
HFModel(
"Learning Progress Analyzer",
"facebook/bart-large",
ModelCategory.LEARNING_ASSESSMENT,
"Comprehensive learning assessment and progress tracking",
True,
False,
1024,
False,
),
HFModel(
"Question Master AI",
"google/t5-base",
ModelCategory.QUIZ_GENERATION,
"Intelligent question generation for all educational levels",
True,
False,
512,
True,
),
HFModel(
"Exam Preparation Specialist",
"microsoft/unilm-base-cased",
ModelCategory.EXAM_PREPARATION,
"Specialized exam preparation and test strategy development",
True,
False,
1024,
False,
),
# Personalized & Adaptive Learning Excellence
HFModel(
"Personal Learning Architect",
"microsoft/deberta-v3-base",
ModelCategory.PERSONALIZED_LEARNING,
"Advanced personalized learning path creation and optimization",
True,
False,
512,
False,
),
HFModel(
"Adaptive Learning Engine",
"facebook/opt-125m",
ModelCategory.ADAPTIVE_LEARNING,
"Intelligent adaptive learning with dynamic content adjustment",
True,
False,
2048,
True,
),
HFModel(
"Learning Analytics Expert",
"microsoft/layoutlm-base-uncased",
ModelCategory.LEARNING_ANALYTICS,
"Advanced learning analytics and educational data interpretation",
True,
False,
512,
False,
),
# Concept Explanation & Understanding Masters
HFModel(
"Concept Explanation Master",
"microsoft/deberta-v3-base",
ModelCategory.CONCEPT_EXPLANATION,
"Master-level concept explanation and knowledge breakdown",
True,
False,
512,
False,
),
HFModel(
"Knowledge Synthesizer",
"google/pegasus-xsum",
ModelCategory.CONCEPT_EXPLANATION,
"Advanced knowledge synthesis and concept summarization",
True,
False,
512,
False,
),
HFModel(
"Interactive Concept Guide",
"facebook/bart-base",
ModelCategory.CONCEPT_EXPLANATION,
"Interactive concept teaching with clarification and examples",
True,
False,
1024,
False,
),
# Homework & Study Support Excellence
HFModel(
"Programming Homework Expert",
"microsoft/codebert-base-mlm",
ModelCategory.HOMEWORK_ASSISTANCE,
"Expert programming homework assistance and debugging support",
True,
False,
1024,
False,
),
HFModel(
"Universal Homework Helper",
"google/flan-t5-small",
ModelCategory.HOMEWORK_ASSISTANCE,
"Comprehensive homework assistance across all academic subjects",
True,
False,
1024,
True,
),
HFModel(
"Global Study Assistant",
"facebook/mbart-large-cc25",
ModelCategory.HOMEWORK_ASSISTANCE,
"Multilingual homework support with cultural context understanding",
True,
False,
1024,
False,
),
# Study Materials & Resources Excellence
HFModel(
"Study Guide Architect",
"microsoft/prophetnet-large-uncased",
ModelCategory.STUDY_GUIDE_CREATION,
"Professional study guide creation and learning material development",
True,
False,
1024,
False,
),
HFModel(
"Educational Resource Creator",
"facebook/bart-large-xsum",
ModelCategory.STUDY_GUIDE_CREATION,
"Comprehensive educational resource and reference material creation",
True,
False,
1024,
False,
),
# Interactive Learning & Gamification
HFModel(
"Educational Game Designer",
"microsoft/DialoGPT-base",
ModelCategory.EDUCATIONAL_GAMES,
"Interactive educational games and gamified learning experiences",
True,
False,
1024,
True,
),
HFModel(
"Learning Game Engine",
"google/bert-base-uncased",
ModelCategory.EDUCATIONAL_GAMES,
"Educational game mechanics and interactive learning systems",
True,
False,
512,
False,
),
# History & Social Studies Excellence
HFModel(
"History Professor AI",
"microsoft/deberta-large",
ModelCategory.HISTORY_TUTORING,
"Professor-level historical analysis and social studies education",
True,
False,
1024,
False,
),
HFModel(
"Interactive History Guide",
"facebook/opt-350m",
ModelCategory.HISTORY_TUTORING,
"Interactive historical narratives and timeline exploration",
True,
False,
2048,
True,
),
# Multi-Subject Teaching Excellence
HFModel(
"Master Subject Teacher",
"google/flan-t5-base",
ModelCategory.SUBJECT_TEACHING,
"Expert multi-subject teaching with instruction-following excellence",
True,
False,
1024,
True,
),
HFModel(
"Universal Educator AI",
"microsoft/unilm-large-cased",
ModelCategory.SUBJECT_TEACHING,
"Universal education AI with cross-disciplinary knowledge",
True,
False,
1024,
False,
),
# Advanced Analytics & Optimization
HFModel(
"Advanced Learning Analytics",
"microsoft/layoutlm-large-uncased",
ModelCategory.LEARNING_ANALYTICS,
"Enterprise-level learning analytics and educational insights",
True,
False,
1024,
False,
),
HFModel(
"Personalization Engine Pro",
"google/electra-large-discriminator",
ModelCategory.PERSONALIZED_LEARNING,
"Advanced AI personalization with learning style adaptation",
True,
False,
512,
False,
),
HFModel(
"Global Adaptive System",
"facebook/mbart-large-50",
ModelCategory.ADAPTIVE_LEARNING,
"Global adaptive learning system with multilingual capabilities",
True,
False,
1024,
False,
),
]
# Qwen Models - Advanced Reasoning and Multimodal AI
QWEN_MODELS = [
# Qwen2.5 Series - Latest Models
HFModel(
"Qwen2.5-72B-Instruct",
"Qwen/Qwen2.5-72B-Instruct",
ModelCategory.TEXT_GENERATION,
"Large-scale instruction-following model for complex reasoning",
True,
False,
32768,
True,
),
HFModel(
"Qwen2.5-32B-Instruct",
"Qwen/Qwen2.5-32B-Instruct",
ModelCategory.TEXT_GENERATION,
"High-performance instruction model for advanced tasks",
True,
False,
32768,
True,
),
HFModel(
"Qwen2.5-14B-Instruct",
"Qwen/Qwen2.5-14B-Instruct",
ModelCategory.TEXT_GENERATION,
"Efficient large model with excellent reasoning capabilities",
True,
False,
32768,
True,
),
HFModel(
"Qwen2.5-7B-Instruct",
"Qwen/Qwen2.5-7B-Instruct",
ModelCategory.TEXT_GENERATION,
"Optimized 7B model for general-purpose applications",
True,
False,
32768,
True,
),
HFModel(
"Qwen2.5-3B-Instruct",
"Qwen/Qwen2.5-3B-Instruct",
ModelCategory.TEXT_GENERATION,
"Lightweight model for resource-constrained environments",
True,
False,
32768,
True,
),
HFModel(
"Qwen2.5-1.5B-Instruct",
"Qwen/Qwen2.5-1.5B-Instruct",
ModelCategory.TEXT_GENERATION,
"Ultra-lightweight model for edge deployment",
True,
False,
32768,
True,
),
HFModel(
"Qwen2.5-0.5B-Instruct",
"Qwen/Qwen2.5-0.5B-Instruct",
ModelCategory.TEXT_GENERATION,
"Minimal footprint model for basic applications",
True,
False,
32768,
True,
),
# Qwen2.5-Coder Series - Programming Specialists
HFModel(
"Qwen2.5-Coder-32B-Instruct",
"Qwen/Qwen2.5-Coder-32B-Instruct",
ModelCategory.QWEN_CODE,
"Advanced code generation and programming assistance",
True,
False,
131072,
True,
),
HFModel(
"Qwen2.5-Coder-14B-Instruct",
"Qwen/Qwen2.5-Coder-14B-Instruct",
ModelCategory.QWEN_CODE,
"Code generation with excellent debugging capabilities",
True,
False,
131072,
True,
),
HFModel(
"Qwen2.5-Coder-7B-Instruct",
"Qwen/Qwen2.5-Coder-7B-Instruct",
ModelCategory.QWEN_CODE,
"Efficient coding assistant for multiple languages",
True,
False,
131072,
True,
),
HFModel(
"Qwen2.5-Coder-3B-Instruct",
"Qwen/Qwen2.5-Coder-3B-Instruct",
ModelCategory.QWEN_CODE,
"Lightweight programming assistant",
True,
False,
131072,
True,
),
HFModel(
"Qwen2.5-Coder-1.5B-Instruct",
"Qwen/Qwen2.5-Coder-1.5B-Instruct",
ModelCategory.QWEN_CODE,
"Compact code generation model",
True,
False,
131072,
True,
),
# Qwen2.5-Math Series - Mathematical Reasoning
HFModel(
"Qwen2.5-Math-72B-Instruct",
"Qwen/Qwen2.5-Math-72B-Instruct",
ModelCategory.QWEN_MATH,
"Advanced mathematical problem solving and reasoning",
True,
False,
32768,
True,
),
HFModel(
"Qwen2.5-Math-7B-Instruct",
"Qwen/Qwen2.5-Math-7B-Instruct",
ModelCategory.QWEN_MATH,
"Mathematical reasoning and calculation assistance",
True,
False,
32768,
True,
),
HFModel(
"Qwen2.5-Math-1.5B-Instruct",
"Qwen/Qwen2.5-Math-1.5B-Instruct",
ModelCategory.QWEN_MATH,
"Compact mathematical problem solver",
True,
False,
32768,
True,
),
# QwQ Series - Reasoning Specialists
HFModel(
"QwQ-32B-Preview",
"Qwen/QwQ-32B-Preview",
ModelCategory.QWEN_REASONING,
"Advanced reasoning and logical thinking model",
True,
False,
32768,
True,
),
# Qwen2-VL Series - Vision-Language Models
HFModel(
"Qwen2-VL-72B-Instruct",
"Qwen/Qwen2-VL-72B-Instruct",
ModelCategory.QWEN_VISION,
"Large-scale vision-language understanding and generation",
True,
False,
32768,
True,
),
HFModel(
"Qwen2-VL-7B-Instruct",
"Qwen/Qwen2-VL-7B-Instruct",
ModelCategory.QWEN_VISION,
"Efficient vision-language model for multimodal tasks",
True,
False,
32768,
True,
),
HFModel(
"Qwen2-VL-2B-Instruct",
"Qwen/Qwen2-VL-2B-Instruct",
ModelCategory.QWEN_VISION,
"Lightweight vision-language model",
True,
False,
32768,
True,
),
# Qwen2-Audio Series - Audio Understanding
HFModel(
"Qwen2-Audio-7B-Instruct",
"Qwen/Qwen2-Audio-7B-Instruct",
ModelCategory.QWEN_AUDIO,
"Advanced audio understanding and generation",
True,
False,
32768,
True,
),
# Qwen Legacy Models - Still Powerful
HFModel(
"Qwen1.5-110B-Chat",
"Qwen/Qwen1.5-110B-Chat",
ModelCategory.CONVERSATIONAL,
"Large conversational model with broad knowledge",
True,
False,
32768,
True,
),
HFModel(
"Qwen1.5-72B-Chat",
"Qwen/Qwen1.5-72B-Chat",
ModelCategory.CONVERSATIONAL,
"Conversational AI with excellent reasoning",
True,
False,
32768,
True,
),
HFModel(
"Qwen1.5-32B-Chat",
"Qwen/Qwen1.5-32B-Chat",
ModelCategory.CONVERSATIONAL,
"Efficient chat model for interactive applications",
True,
False,
32768,
True,
),
HFModel(
"Qwen1.5-14B-Chat",
"Qwen/Qwen1.5-14B-Chat",
ModelCategory.CONVERSATIONAL,
"Balanced performance chat model",
True,
False,
32768,
True,
),
HFModel(
"Qwen1.5-7B-Chat",
"Qwen/Qwen1.5-7B-Chat",
ModelCategory.CONVERSATIONAL,
"Popular chat model with good performance",
True,
False,
32768,
True,
),
HFModel(
"Qwen1.5-4B-Chat",
"Qwen/Qwen1.5-4B-Chat",
ModelCategory.CONVERSATIONAL,
"Lightweight conversational AI",
True,
False,
32768,
True,
),
]
# DeepSeek Models - Coding and Reasoning Excellence
DEEPSEEK_MODELS = [
# DeepSeek-V3 Series - Latest Generation
HFModel(
"DeepSeek-V3",
"deepseek-ai/DeepSeek-V3",
ModelCategory.DEEPSEEK_REASONING,
"Latest generation reasoning and knowledge model",
True,
False,
65536,
True,
),
HFModel(
"DeepSeek-V3-Base",
"deepseek-ai/DeepSeek-V3-Base",
ModelCategory.TEXT_GENERATION,
"Foundation model for various downstream tasks",
True,
False,
65536,
True,
),
# DeepSeek-V2.5 Series
HFModel(
"DeepSeek-V2.5",
"deepseek-ai/DeepSeek-V2.5",
ModelCategory.DEEPSEEK_REASONING,
"Advanced reasoning and general intelligence model",
True,
False,
32768,
True,
),
# DeepSeek-Coder Series - Programming Specialists
HFModel(
"DeepSeek-Coder-V2-Instruct",
"deepseek-ai/DeepSeek-Coder-V2-Instruct",
ModelCategory.DEEPSEEK_CODING,
"Advanced code generation and programming assistance",
True,
False,
163840,
True,
),
HFModel(
"DeepSeek-Coder-V2-Base",
"deepseek-ai/DeepSeek-Coder-V2-Base",
ModelCategory.DEEPSEEK_CODING,
"Foundation coding model for fine-tuning",
True,
False,
163840,
True,
),
HFModel(
"DeepSeek-Coder-33B-Instruct",
"deepseek-ai/deepseek-coder-33b-instruct",
ModelCategory.DEEPSEEK_CODING,
"Large-scale code generation and debugging",
True,
False,
16384,
True,
),
HFModel(
"DeepSeek-Coder-6.7B-Instruct",
"deepseek-ai/deepseek-coder-6.7b-instruct",
ModelCategory.DEEPSEEK_CODING,
"Efficient code assistance and generation",
True,
False,
16384,
True,
),
HFModel(
"DeepSeek-Coder-1.3B-Instruct",
"deepseek-ai/deepseek-coder-1.3b-instruct",
ModelCategory.DEEPSEEK_CODING,
"Lightweight coding assistant",
True,
False,
16384,
True,
),
# DeepSeek-Math Series - Mathematical Reasoning
HFModel(
"DeepSeek-Math-7B-Instruct",
"deepseek-ai/deepseek-math-7b-instruct",
ModelCategory.DEEPSEEK_MATH,
"Mathematical problem solving and reasoning",
True,
False,
4096,
True,
),
HFModel(
"DeepSeek-Math-7B-Base",
"deepseek-ai/deepseek-math-7b-base",
ModelCategory.DEEPSEEK_MATH,
"Foundation model for mathematical reasoning",
True,
False,
4096,
True,
),
# DeepSeek Chat Models
HFModel(
"DeepSeek-67B-Chat",
"deepseek-ai/deepseek-llm-67b-chat",
ModelCategory.CONVERSATIONAL,
"Large conversational model with strong reasoning",
True,
False,
4096,
True,
),
HFModel(
"DeepSeek-7B-Chat",
"deepseek-ai/deepseek-llm-7b-chat",
ModelCategory.CONVERSATIONAL,
"Efficient chat model for general conversations",
True,
False,
4096,
True,
),
# DeepSeek-VL Series - Vision-Language
HFModel(
"DeepSeek-VL-7B-Chat",
"deepseek-ai/deepseek-vl-7b-chat",
ModelCategory.VISION_LANGUAGE,
"Vision-language understanding and conversation",
True,
False,
4096,
True,
),
HFModel(
"DeepSeek-VL-1.3B-Chat",
"deepseek-ai/deepseek-vl-1.3b-chat",
ModelCategory.VISION_LANGUAGE,
"Lightweight vision-language model",
True,
False,
4096,
True,
),
]
# Advanced Image Editing Models
IMAGE_EDITING_MODELS = [
# Professional Image Editing
HFModel(
"SDXL Inpainting",
"diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
ModelCategory.IMAGE_EDITING,
"High-quality image inpainting and editing",
True,
False,
1024,
False,
),
HFModel(
"ControlNet Inpainting",
"lllyasviel/control_v11p_sd15_inpaint",
ModelCategory.IMAGE_EDITING,
"Controllable image inpainting with precise editing",
True,
False,
512,
False,
),
HFModel(
"InstantID Face Editor",
"InstantX/InstantID",
ModelCategory.FACE_ENHANCEMENT,
"Identity-preserving face editing and enhancement",
True,
False,
512,
False,
),
HFModel(
"Real-ESRGAN Upscaler",
"ai-forever/Real-ESRGAN",
ModelCategory.IMAGE_UPSCALING,
"Advanced image super-resolution and enhancement",
True,
False,
1024,
False,
),
HFModel(
"GFPGAN Face Restoration",
"Xintao/GFPGAN",
ModelCategory.FACE_RESTORATION,
"High-quality face restoration and enhancement",
True,
False,
512,
False,
),
HFModel(
"CodeFormer Face Restoration",
"sczhou/CodeFormer",
ModelCategory.FACE_RESTORATION,
"Robust face restoration for low-quality images",
True,
False,
512,
False,
),
HFModel(
"Background Removal",
"briaai/RMBG-1.4",
ModelCategory.BACKGROUND_REMOVAL,
"Precise background removal and segmentation",
True,
False,
1024,
False,
),
HFModel(
"U2-Net Background Removal",
"simonw/u2net-portrait-segmentation",
ModelCategory.BACKGROUND_REMOVAL,
"Portrait and object background removal",
True,
False,
320,
False,
),
HFModel(
"Photo Colorization",
"microsoft/beit-base-patch16-224-pt22k-ft22k",
ModelCategory.COLOR_CORRECTION,
"AI-powered photo colorization and enhancement",
True,
False,
224,
False,
),
HFModel(
"Style Transfer Neural",
"pytorch/vision",
ModelCategory.ARTISTIC_FILTER,
"Neural style transfer for artistic image effects",
True,
False,
512,
False,
),
]
# Face Swap and Manipulation Models
FACE_SWAP_MODELS = [
# Advanced Face Swapping
HFModel(
"InsightFace SwapFace",
"deepinsight/inswapper_128.onnx",
ModelCategory.FACE_SWAP,
"High-quality face swapping with identity preservation",
True,
False,
128,
False,
),
HFModel(
"SimSwap Face Swap",
"ppogg/simswap_official",
ModelCategory.FACE_SWAP,
"Realistic face swapping for videos and images",
True,
False,
224,
False,
),
HFModel(
"FaceX-Zoo Face Swap",
"FacePerceiver/FaceX-Zoo",
ModelCategory.FACE_SWAP,
"Multi-purpose face analysis and swapping toolkit",
True,
False,
112,
False,
),
HFModel(
"Face Enhancement Pro",
"TencentARC/GFPGAN",
ModelCategory.FACE_ENHANCEMENT,
"Professional face enhancement and restoration",
True,
False,
512,
False,
),
HFModel(
"DualStyleGAN Face Edit",
"williamyang1991/DualStyleGAN",
ModelCategory.FACE_ENHANCEMENT,
"Style-controllable face image editing",
True,
False,
1024,
False,
),
HFModel(
"MegaPortraits Face Animate",
"NVlabs/MegaPortraits",
ModelCategory.FACIAL_ANIMATION,
"One-shot facial animation and expression transfer",
True,
False,
256,
False,
),
]
# Advanced TTS and STT Models
ADVANCED_SPEECH_MODELS = [
# Multilingual Text-to-Speech
HFModel(
"XTTS v2 Multilingual",
"coqui/XTTS-v2",
ModelCategory.MULTILINGUAL_TTS,
"High-quality multilingual text-to-speech with voice cloning",
True,
False,
24000,
True,
),
HFModel(
"Bark Text-to-Speech",
"suno/bark",
ModelCategory.ADVANCED_TTS,
"Generative TTS with music, sound effects, and multiple speakers",
True,
False,
24000,
False,
),
HFModel(
"SpeechT5 TTS",
"microsoft/speecht5_tts",
ModelCategory.ADVANCED_TTS,
"High-quality neural text-to-speech synthesis",
True,
False,
16000,
False,
),
HFModel(
"VALL-E X Multilingual",
"Plachtaa/VALL-E-X",
ModelCategory.MULTILINGUAL_TTS,
"Zero-shot voice synthesis in multiple languages",
True,
False,
24000,
False,
),
HFModel(
"Arabic TTS",
"arabic-speech-corpus/tts-arabic",
ModelCategory.MULTILINGUAL_TTS,
"High-quality Arabic text-to-speech synthesis",
True,
False,
22050,
False,
),
HFModel(
"Tortoise TTS",
"jbetker/tortoise-tts",
ModelCategory.VOICE_CLONING,
"High-quality voice cloning and synthesis",
True,
False,
22050,
False,
),
# Advanced Speech-to-Text
HFModel(
"Whisper Large v3",
"openai/whisper-large-v3",
ModelCategory.MULTILINGUAL_STT,
"State-of-the-art multilingual speech recognition",
True,
False,
30,
False,
),
HFModel(
"Whisper Large v3 Turbo",
"openai/whisper-large-v3-turbo",
ModelCategory.MULTILINGUAL_STT,
"Fast multilingual speech recognition with high accuracy",
True,
False,
30,
True,
),
HFModel(
"Arabic Whisper",
"arabic-speech-corpus/whisper-large-arabic",
ModelCategory.MULTILINGUAL_STT,
"Optimized Arabic speech recognition model",
True,
False,
30,
False,
),
HFModel(
"MMS Speech Recognition",
"facebook/mms-1b-all",
ModelCategory.MULTILINGUAL_STT,
"Massively multilingual speech recognition (1000+ languages)",
True,
False,
16000,
False,
),
HFModel(
"Wav2Vec2 Arabic",
"facebook/wav2vec2-large-xlsr-53-arabic",
ModelCategory.MULTILINGUAL_STT,
"Arabic speech recognition with Wav2Vec2 architecture",
True,
False,
16000,
False,
),
HFModel(
"SpeechT5 ASR",
"microsoft/speecht5_asr",
ModelCategory.ADVANCED_STT,
"Advanced automatic speech recognition",
True,
False,
16000,
False,
),
# Real-time Translation and Voice Conversion
HFModel(
"SeamlessM4T",
"facebook/seamless-m4t-v2-large",
ModelCategory.REAL_TIME_TRANSLATION,
"Multilingual speech-to-speech translation",
True,
False,
16000,
True,
),
HFModel(
"Voice Conversion VITS",
"jaywalnut310/vits-ljs",
ModelCategory.VOICE_CONVERSION,
"High-quality voice conversion and synthesis",
True,
False,
22050,
False,
),
HFModel(
"RVC Voice Clone",
"lj1995/GPT-SoVITS",
ModelCategory.VOICE_CLONING,
"Real-time voice cloning and conversion",
True,
False,
32000,
True,
),
]
# Talking Avatar and Video Generation Models
TALKING_AVATAR_MODELS = [
# Talking Head Generation
HFModel(
"SadTalker Talking Head",
"vinthony/SadTalker",
ModelCategory.TALKING_AVATAR,
"Generate talking head videos from audio and single image",
True,
False,
256,
False,
),
HFModel(
"Real-Time Face Animation",
"PaddlePaddle/PaddleGAN-FOM",
ModelCategory.FACIAL_ANIMATION,
"Real-time facial animation and expression control",
True,
False,
256,
True,
),
HFModel(
"LivePortrait Animation",
"KwaiVGI/LivePortrait",
ModelCategory.TALKING_AVATAR,
"High-quality portrait animation with lip sync",
True,
False,
512,
False,
),
HFModel(
"DualTalker Video",
"OpenTalker/DualTalker",
ModelCategory.TALKING_AVATAR,
"Dual-modal talking face generation with enhanced quality",
True,
False,
256,
False,
),
HFModel(
"Video Retalking",
"vinthony/video-retalking",
ModelCategory.LIP_SYNC,
"Audio-driven lip sync for existing videos",
True,
False,
224,
False,
),
HFModel(
"Wav2Lip Lip Sync",
"Rudrabha/Wav2Lip",
ModelCategory.LIP_SYNC,
"Accurate lip sync generation from audio",
True,
False,
96,
False,
),
HFModel(
"Digital Human Avatar",
"modelscope/damo-text-to-video-synthesis",
ModelCategory.VIRTUAL_PRESENTER,
"Generate digital human presenter videos",
True,
False,
320,
False,
),
HFModel(
"AI News Anchor",
"microsoft/DiT-XL-2-256",
ModelCategory.AI_ANCHOR,
"Professional AI news anchor and presenter generation",
True,
False,
256,
False,
),
HFModel(
"Avatar Gesture Control",
"ZhengPeng7/BiSeNet",
ModelCategory.GESTURE_GENERATION,
"Generate natural gestures and body language for avatars",
True,
False,
512,
False,
),
]
# Interactive Language Models (English-Arabic Focus)
INTERACTIVE_LANGUAGE_MODELS = [
# Bilingual Conversation Models
HFModel(
"AceGPT Arabic-English",
"FreedomIntelligence/AceGPT-13B",
ModelCategory.BILINGUAL_CONVERSATION,
"Bilingual Arabic-English conversation model",
True,
False,
4096,
True,
),
HFModel(
"Jais Arabic Chat",
"core42/jais-13b-chat",
ModelCategory.BILINGUAL_CONVERSATION,
"Advanced Arabic conversation model with English support",
True,
False,
2048,
True,
),
HFModel(
"AraBART Conversational",
"aubmindlab/arabart-base-conversational",
ModelCategory.BILINGUAL_CONVERSATION,
"Arabic conversational AI with cultural understanding",
True,
False,
1024,
True,
),
HFModel(
"Multilingual Chat Assistant",
"microsoft/DialoGPT-large",
ModelCategory.INTERACTIVE_CHAT,
"Interactive chat assistant supporting multiple languages",
True,
False,
1024,
True,
),
HFModel(
"Cultural Context Chat",
"bigscience/bloom-7b1",
ModelCategory.CULTURAL_ADAPTATION,
"Culturally aware conversation model for diverse contexts",
True,
False,
2048,
True,
),
HFModel(
"Context-Aware Assistant",
"microsoft/GODEL-v1_1-large-seq2seq",
ModelCategory.CONTEXT_AWARE_CHAT,
"Context-aware conversational AI with memory",
True,
False,
1024,
True,
),
HFModel(
"Personality Chat Bot",
"microsoft/PersonaGPT",
ModelCategory.PERSONALITY_CHAT,
"Personality-driven conversational AI with distinct characters",
True,
False,
1024,
True,
),
HFModel(
"Role-Play Assistant",
"PygmalionAI/pygmalion-6b",
ModelCategory.ROLE_PLAY_CHAT,
"Interactive role-playing conversation model",
True,
False,
2048,
True,
),
HFModel(
"Domain Expert Chat",
"microsoft/DialoGPT-medium",
ModelCategory.DOMAIN_SPECIFIC_CHAT,
"Specialized domain conversation assistant",
True,
False,
1024,
True,
),
# Arabic Language Specialists
HFModel(
"Arabic GPT-J",
"aubmindlab/aragpt2-base",
ModelCategory.BILINGUAL_CONVERSATION,
"Arabic language generation and conversation",
True,
False,
1024,
True,
),
HFModel(
"Marbert Arabic Chat",
"UBC-NLP/MARBERT",
ModelCategory.BILINGUAL_CONVERSATION,
"Dialectal Arabic conversation model",
True,
False,
512,
False,
),
HFModel(
"ArabicBERT Chat",
"aubmindlab/bert-base-arabertv2",
ModelCategory.BILINGUAL_CONVERSATION,
"Modern Standard Arabic conversational understanding",
True,
False,
512,
False,
),
]
class HuggingFaceInference:
"""Hugging Face Inference API integration"""
def __init__(
self,
api_token: str,
base_url: str = "https://api-inference.huggingface.co/models/",
):
self.api_token = api_token
self.base_url = base_url
self.session = None
async def __aenter__(self):
self.session = aiohttp.ClientSession(
headers={"Authorization": f"Bearer {self.api_token}"},
timeout=aiohttp.ClientTimeout(total=300), # 5 minutes timeout
)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.session:
await self.session.close()
async def text_generation(
self,
model_id: str,
prompt: str,
max_tokens: int = 100,
temperature: float = 0.7,
stream: bool = False,
**kwargs,
) -> Dict[str, Any]:
"""Generate text using a text generation model"""
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": max_tokens,
"temperature": temperature,
"do_sample": True,
**kwargs,
},
"options": {"use_cache": False},
}
if stream:
return await self._stream_request(model_id, payload)
else:
return await self._request(model_id, payload)
async def text_to_image(
self,
model_id: str,
prompt: str,
negative_prompt: Optional[str] = None,
**kwargs,
) -> bytes:
"""Generate image from text prompt"""
payload = {
"inputs": prompt,
"parameters": {
**({"negative_prompt": negative_prompt} if negative_prompt else {}),
**kwargs,
},
}
response = await self._request(model_id, payload, expect_json=False)
return response
async def automatic_speech_recognition(
self, model_id: str, audio_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Transcribe audio to text"""
# Convert audio bytes to base64 for API
audio_b64 = base64.b64encode(audio_data).decode()
payload = {"inputs": audio_b64, "parameters": kwargs}
return await self._request(model_id, payload)
async def text_to_speech(self, model_id: str, text: str, **kwargs) -> bytes:
"""Convert text to speech audio"""
payload = {"inputs": text, "parameters": kwargs}
response = await self._request(model_id, payload, expect_json=False)
return response
async def image_classification(
self, model_id: str, image_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Classify images"""
# Convert image to base64
image_b64 = base64.b64encode(image_data).decode()
payload = {"inputs": image_b64, "parameters": kwargs}
return await self._request(model_id, payload)
async def feature_extraction(
self, model_id: str, texts: Union[str, List[str]], **kwargs
) -> Dict[str, Any]:
"""Extract embeddings from text"""
payload = {"inputs": texts, "parameters": kwargs}
return await self._request(model_id, payload)
async def translation(
self,
model_id: str,
text: str,
src_lang: Optional[str] = None,
tgt_lang: Optional[str] = None,
**kwargs,
) -> Dict[str, Any]:
"""Translate text between languages"""
payload = {
"inputs": text,
"parameters": {
**({"src_lang": src_lang} if src_lang else {}),
**({"tgt_lang": tgt_lang} if tgt_lang else {}),
**kwargs,
},
}
return await self._request(model_id, payload)
async def summarization(
self,
model_id: str,
text: str,
max_length: int = 150,
min_length: int = 30,
**kwargs,
) -> Dict[str, Any]:
"""Summarize text"""
payload = {
"inputs": text,
"parameters": {
"max_length": max_length,
"min_length": min_length,
**kwargs,
},
}
return await self._request(model_id, payload)
async def question_answering(
self, model_id: str, question: str, context: str, **kwargs
) -> Dict[str, Any]:
"""Answer questions based on context"""
payload = {
"inputs": {"question": question, "context": context},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def zero_shot_classification(
self, model_id: str, text: str, candidate_labels: List[str], **kwargs
) -> Dict[str, Any]:
"""Classify text without training data"""
payload = {
"inputs": text,
"parameters": {"candidate_labels": candidate_labels, **kwargs},
}
return await self._request(model_id, payload)
async def conversational(
self,
model_id: str,
text: str,
conversation_history: Optional[List[Dict[str, str]]] = None,
**kwargs,
) -> Dict[str, Any]:
"""Have a conversation with a model"""
payload = {
"inputs": {
"text": text,
**(
{
"past_user_inputs": [
h["user"] for h in conversation_history if "user" in h
]
}
if conversation_history
else {}
),
**(
{
"generated_responses": [
h["bot"] for h in conversation_history if "bot" in h
]
}
if conversation_history
else {}
),
},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def _request(
self, model_id: str, payload: Dict[str, Any], expect_json: bool = True
) -> Union[Dict[str, Any], bytes]:
"""Make HTTP request to Hugging Face API"""
url = f"{self.base_url}{model_id}"
try:
async with self.session.post(url, json=payload) as response:
if response.status == 200:
if expect_json:
return await response.json()
else:
return await response.read()
elif response.status == 503:
# Model is loading, wait and retry
error_info = await response.json()
estimated_time = error_info.get("estimated_time", 30)
logger.info(
f"Model {model_id} is loading, waiting {estimated_time}s"
)
await asyncio.sleep(min(estimated_time, 60)) # Cap at 60 seconds
return await self._request(model_id, payload, expect_json)
else:
error_text = await response.text()
raise Exception(
f"API request failed with status {response.status}: {error_text}"
)
except Exception as e:
logger.error(f"Error calling Hugging Face API for {model_id}: {e}")
raise
async def _stream_request(self, model_id: str, payload: Dict[str, Any]):
"""Stream response from Hugging Face API"""
url = f"{self.base_url}{model_id}"
payload["stream"] = True
try:
async with self.session.post(url, json=payload) as response:
if response.status == 200:
async for chunk in response.content:
if chunk:
yield chunk.decode("utf-8")
else:
error_text = await response.text()
raise Exception(
f"Streaming request failed with status {response.status}: {error_text}"
)
except Exception as e:
logger.error(f"Error streaming from Hugging Face API for {model_id}: {e}")
raise
# New methods for expanded model categories
async def text_to_video(
self, model_id: str, prompt: str, **kwargs
) -> Dict[str, Any]:
"""Generate video from text prompt"""
payload = {
"inputs": prompt,
"parameters": {
"duration": kwargs.get("duration", 5),
"fps": kwargs.get("fps", 24),
"width": kwargs.get("width", 512),
"height": kwargs.get("height", 512),
**kwargs,
},
}
return await self._request(model_id, payload)
async def video_to_text(
self, model_id: str, video_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Analyze video and generate text description"""
video_b64 = base64.b64encode(video_data).decode()
payload = {
"inputs": {"video": video_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def code_generation(
self, model_id: str, prompt: str, **kwargs
) -> Dict[str, Any]:
"""Generate code from natural language prompt"""
payload = {
"inputs": prompt,
"parameters": {
"max_length": kwargs.get("max_length", 500),
"temperature": kwargs.get("temperature", 0.2),
"language": kwargs.get("language", "python"),
**kwargs,
},
}
return await self._request(model_id, payload)
async def code_completion(
self, model_id: str, code: str, **kwargs
) -> Dict[str, Any]:
"""Complete partial code"""
payload = {
"inputs": code,
"parameters": {
"max_length": kwargs.get("max_length", 100),
"temperature": kwargs.get("temperature", 0.1),
**kwargs,
},
}
return await self._request(model_id, payload)
async def text_to_3d(self, model_id: str, prompt: str, **kwargs) -> Dict[str, Any]:
"""Generate 3D model from text description"""
payload = {
"inputs": prompt,
"parameters": {
"resolution": kwargs.get("resolution", 64),
"format": kwargs.get("format", "obj"),
**kwargs,
},
}
return await self._request(model_id, payload)
async def image_to_3d(
self, model_id: str, image_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Generate 3D model from image"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def ocr(self, model_id: str, image_data: bytes, **kwargs) -> Dict[str, Any]:
"""Perform optical character recognition on image"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64},
"parameters": {"language": kwargs.get("language", "en"), **kwargs},
}
return await self._request(model_id, payload)
async def document_analysis(
self, model_id: str, document_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Analyze document structure and content"""
doc_b64 = base64.b64encode(document_data).decode()
payload = {
"inputs": {"document": doc_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def vision_language(
self, model_id: str, image_data: bytes, text: str, **kwargs
) -> Dict[str, Any]:
"""Process image and text together"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64, "text": text},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def multimodal_reasoning(
self, model_id: str, inputs: Dict[str, Any], **kwargs
) -> Dict[str, Any]:
"""Perform reasoning across multiple modalities"""
payload = {
"inputs": inputs,
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def music_generation(
self, model_id: str, prompt: str, **kwargs
) -> Dict[str, Any]:
"""Generate music from text prompt"""
payload = {
"inputs": prompt,
"parameters": {
"duration": kwargs.get("duration", 30),
"bpm": kwargs.get("bpm", 120),
"genre": kwargs.get("genre", "electronic"),
**kwargs,
},
}
return await self._request(model_id, payload)
async def voice_cloning(
self, model_id: str, text: str, voice_sample: bytes, **kwargs
) -> bytes:
"""Clone voice and synthesize speech"""
voice_b64 = base64.b64encode(voice_sample).decode()
payload = {
"inputs": {"text": text, "voice_sample": voice_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload, expect_json=False)
async def super_resolution(
self, model_id: str, image_data: bytes, **kwargs
) -> bytes:
"""Enhance image resolution"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64},
"parameters": {"scale_factor": kwargs.get("scale_factor", 4), **kwargs},
}
return await self._request(model_id, payload, expect_json=False)
async def background_removal(
self, model_id: str, image_data: bytes, **kwargs
) -> bytes:
"""Remove background from image"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload, expect_json=False)
async def creative_writing(
self, model_id: str, prompt: str, **kwargs
) -> Dict[str, Any]:
"""Generate creative content"""
payload = {
"inputs": prompt,
"parameters": {
"max_length": kwargs.get("max_length", 1000),
"creativity": kwargs.get("creativity", 0.8),
"genre": kwargs.get("genre", "general"),
**kwargs,
},
}
return await self._request(model_id, payload)
async def business_document(
self, model_id: str, document_type: str, context: str, **kwargs
) -> Dict[str, Any]:
"""Generate business documents"""
payload = {
"inputs": f"Generate {document_type}: {context}",
"parameters": {
"format": kwargs.get("format", "professional"),
"length": kwargs.get("length", "medium"),
**kwargs,
},
}
return await self._request(model_id, payload)
class HuggingFaceModelManager:
"""Manager for all Hugging Face model operations"""
def __init__(self, api_token: str):
self.api_token = api_token
self.models = HuggingFaceModels()
def get_models_by_category(self, category: ModelCategory) -> List[HFModel]:
"""Get all models for a specific category"""
all_models = []
if category == ModelCategory.TEXT_GENERATION:
all_models = self.models.TEXT_GENERATION_MODELS
elif category == ModelCategory.TEXT_TO_IMAGE:
all_models = self.models.TEXT_TO_IMAGE_MODELS
elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
all_models = self.models.ASR_MODELS
elif category == ModelCategory.TEXT_TO_SPEECH:
all_models = self.models.TTS_MODELS
elif category == ModelCategory.IMAGE_CLASSIFICATION:
all_models = self.models.IMAGE_CLASSIFICATION_MODELS
elif category == ModelCategory.FEATURE_EXTRACTION:
all_models = self.models.FEATURE_EXTRACTION_MODELS
elif category == ModelCategory.TRANSLATION:
all_models = self.models.TRANSLATION_MODELS
elif category == ModelCategory.SUMMARIZATION:
all_models = self.models.SUMMARIZATION_MODELS
return all_models
def get_all_models(self) -> Dict[ModelCategory, List[HFModel]]:
"""Get all available models organized by category"""
return {
# Core AI categories
ModelCategory.TEXT_GENERATION: self.models.TEXT_GENERATION_MODELS,
ModelCategory.TEXT_TO_IMAGE: self.models.TEXT_TO_IMAGE_MODELS,
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: self.models.ASR_MODELS,
ModelCategory.TEXT_TO_SPEECH: self.models.TTS_MODELS,
ModelCategory.IMAGE_CLASSIFICATION: self.models.IMAGE_CLASSIFICATION_MODELS,
ModelCategory.FEATURE_EXTRACTION: self.models.FEATURE_EXTRACTION_MODELS,
ModelCategory.TRANSLATION: self.models.TRANSLATION_MODELS,
ModelCategory.SUMMARIZATION: self.models.SUMMARIZATION_MODELS,
# Video and Motion
ModelCategory.TEXT_TO_VIDEO: self.models.VIDEO_GENERATION_MODELS,
ModelCategory.VIDEO_GENERATION: self.models.VIDEO_GENERATION_MODELS,
ModelCategory.VIDEO_TO_TEXT: self.models.VIDEO_GENERATION_MODELS,
ModelCategory.VIDEO_CLASSIFICATION: self.models.VIDEO_GENERATION_MODELS,
# Code and Development
ModelCategory.CODE_GENERATION: self.models.CODE_GENERATION_MODELS,
ModelCategory.CODE_COMPLETION: self.models.CODE_GENERATION_MODELS,
ModelCategory.CODE_EXPLANATION: self.models.CODE_GENERATION_MODELS,
ModelCategory.APP_GENERATION: self.models.CODE_GENERATION_MODELS,
# 3D and AR/VR
ModelCategory.TEXT_TO_3D: self.models.THREE_D_MODELS,
ModelCategory.IMAGE_TO_3D: self.models.THREE_D_MODELS,
ModelCategory.THREE_D_GENERATION: self.models.THREE_D_MODELS,
ModelCategory.MESH_GENERATION: self.models.THREE_D_MODELS,
# Document Processing
ModelCategory.OCR: self.models.DOCUMENT_PROCESSING_MODELS,
ModelCategory.DOCUMENT_ANALYSIS: self.models.DOCUMENT_PROCESSING_MODELS,
ModelCategory.HANDWRITING_RECOGNITION: self.models.DOCUMENT_PROCESSING_MODELS,
ModelCategory.TABLE_EXTRACTION: self.models.DOCUMENT_PROCESSING_MODELS,
ModelCategory.FORM_PROCESSING: self.models.DOCUMENT_PROCESSING_MODELS,
# Multimodal AI
ModelCategory.VISION_LANGUAGE: self.models.MULTIMODAL_MODELS,
ModelCategory.MULTIMODAL_REASONING: self.models.MULTIMODAL_MODELS,
ModelCategory.VISUAL_QUESTION_ANSWERING: self.models.MULTIMODAL_MODELS,
ModelCategory.MULTIMODAL_CHAT: self.models.MULTIMODAL_MODELS,
ModelCategory.CROSS_MODAL_GENERATION: self.models.MULTIMODAL_MODELS,
# Specialized AI
ModelCategory.MUSIC_GENERATION: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.VOICE_CLONING: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.SUPER_RESOLUTION: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.FACE_RESTORATION: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.IMAGE_INPAINTING: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.BACKGROUND_REMOVAL: self.models.SPECIALIZED_AI_MODELS,
# Creative Content
ModelCategory.CREATIVE_WRITING: self.models.CREATIVE_CONTENT_MODELS,
ModelCategory.STORY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
ModelCategory.POETRY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
ModelCategory.BLOG_WRITING: self.models.CREATIVE_CONTENT_MODELS,
ModelCategory.MARKETING_COPY: self.models.CREATIVE_CONTENT_MODELS,
# Game Development
ModelCategory.GAME_ASSET_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
ModelCategory.CHARACTER_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
ModelCategory.LEVEL_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
ModelCategory.DIALOGUE_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
# Science and Research
ModelCategory.PROTEIN_FOLDING: self.models.SCIENCE_RESEARCH_MODELS,
ModelCategory.MOLECULE_GENERATION: self.models.SCIENCE_RESEARCH_MODELS,
ModelCategory.SCIENTIFIC_WRITING: self.models.SCIENCE_RESEARCH_MODELS,
ModelCategory.RESEARCH_ASSISTANCE: self.models.SCIENCE_RESEARCH_MODELS,
ModelCategory.DATA_ANALYSIS: self.models.SCIENCE_RESEARCH_MODELS,
# Business and Productivity
ModelCategory.EMAIL_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
ModelCategory.PRESENTATION_CREATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
ModelCategory.REPORT_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
ModelCategory.MEETING_SUMMARIZATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
ModelCategory.PROJECT_PLANNING: self.models.BUSINESS_PRODUCTIVITY_MODELS,
# AI Teacher and Education Models
ModelCategory.AI_TUTORING: self.models.AI_TEACHER_MODELS,
ModelCategory.EDUCATIONAL_CONTENT: self.models.AI_TEACHER_MODELS,
ModelCategory.LESSON_PLANNING: self.models.AI_TEACHER_MODELS,
ModelCategory.CONCEPT_EXPLANATION: self.models.AI_TEACHER_MODELS,
ModelCategory.HOMEWORK_ASSISTANCE: self.models.AI_TEACHER_MODELS,
ModelCategory.QUIZ_GENERATION: self.models.AI_TEACHER_MODELS,
ModelCategory.CURRICULUM_DESIGN: self.models.AI_TEACHER_MODELS,
ModelCategory.LEARNING_ASSESSMENT: self.models.AI_TEACHER_MODELS,
ModelCategory.ADAPTIVE_LEARNING: self.models.AI_TEACHER_MODELS,
ModelCategory.SUBJECT_TEACHING: self.models.AI_TEACHER_MODELS,
ModelCategory.MATH_TUTORING: self.models.AI_TEACHER_MODELS,
ModelCategory.SCIENCE_TUTORING: self.models.AI_TEACHER_MODELS,
ModelCategory.LANGUAGE_TUTORING: self.models.AI_TEACHER_MODELS,
ModelCategory.HISTORY_TUTORING: self.models.AI_TEACHER_MODELS,
ModelCategory.CODING_INSTRUCTION: self.models.AI_TEACHER_MODELS,
ModelCategory.EXAM_PREPARATION: self.models.AI_TEACHER_MODELS,
ModelCategory.STUDY_GUIDE_CREATION: self.models.AI_TEACHER_MODELS,
ModelCategory.EDUCATIONAL_GAMES: self.models.AI_TEACHER_MODELS,
ModelCategory.LEARNING_ANALYTICS: self.models.AI_TEACHER_MODELS,
ModelCategory.PERSONALIZED_LEARNING: self.models.AI_TEACHER_MODELS,
# Qwen Models
ModelCategory.QWEN_REASONING: self.models.QWEN_MODELS,
ModelCategory.QWEN_MATH: self.models.QWEN_MODELS,
ModelCategory.QWEN_CODE: self.models.QWEN_MODELS,
ModelCategory.QWEN_VISION: self.models.QWEN_MODELS,
ModelCategory.QWEN_AUDIO: self.models.QWEN_MODELS,
# DeepSeek Models
ModelCategory.DEEPSEEK_CODING: self.models.DEEPSEEK_MODELS,
ModelCategory.DEEPSEEK_REASONING: self.models.DEEPSEEK_MODELS,
ModelCategory.DEEPSEEK_MATH: self.models.DEEPSEEK_MODELS,
ModelCategory.DEEPSEEK_RESEARCH: self.models.DEEPSEEK_MODELS,
# Advanced Image Processing & Manipulation
ModelCategory.IMAGE_EDITING: self.models.IMAGE_EDITING_MODELS,
ModelCategory.FACE_SWAP: self.models.FACE_SWAP_MODELS,
ModelCategory.FACE_ENHANCEMENT: self.models.FACE_SWAP_MODELS,
ModelCategory.FACE_GENERATION: self.models.FACE_SWAP_MODELS,
ModelCategory.PORTRAIT_EDITING: self.models.IMAGE_EDITING_MODELS,
ModelCategory.PHOTO_RESTORATION: self.models.IMAGE_EDITING_MODELS,
ModelCategory.IMAGE_UPSCALING: self.models.IMAGE_EDITING_MODELS,
ModelCategory.COLOR_CORRECTION: self.models.IMAGE_EDITING_MODELS,
ModelCategory.ARTISTIC_FILTER: self.models.IMAGE_EDITING_MODELS,
# Advanced Speech & Audio
ModelCategory.ADVANCED_TTS: self.models.ADVANCED_SPEECH_MODELS,
ModelCategory.ADVANCED_STT: self.models.ADVANCED_SPEECH_MODELS,
ModelCategory.VOICE_CONVERSION: self.models.ADVANCED_SPEECH_MODELS,
ModelCategory.SPEECH_ENHANCEMENT: self.models.ADVANCED_SPEECH_MODELS,
ModelCategory.AUDIO_GENERATION: self.models.ADVANCED_SPEECH_MODELS,
ModelCategory.MULTILINGUAL_TTS: self.models.ADVANCED_SPEECH_MODELS,
ModelCategory.MULTILINGUAL_STT: self.models.ADVANCED_SPEECH_MODELS,
ModelCategory.REAL_TIME_TRANSLATION: self.models.ADVANCED_SPEECH_MODELS,
# Interactive Avatar & Video Generation
ModelCategory.TALKING_AVATAR: self.models.TALKING_AVATAR_MODELS,
ModelCategory.AVATAR_GENERATION: self.models.TALKING_AVATAR_MODELS,
ModelCategory.LIP_SYNC: self.models.TALKING_AVATAR_MODELS,
ModelCategory.FACIAL_ANIMATION: self.models.TALKING_AVATAR_MODELS,
ModelCategory.GESTURE_GENERATION: self.models.TALKING_AVATAR_MODELS,
ModelCategory.VIRTUAL_PRESENTER: self.models.TALKING_AVATAR_MODELS,
ModelCategory.AI_ANCHOR: self.models.TALKING_AVATAR_MODELS,
# Interactive Language & Conversation
ModelCategory.INTERACTIVE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
ModelCategory.BILINGUAL_CONVERSATION: self.models.INTERACTIVE_LANGUAGE_MODELS,
ModelCategory.CULTURAL_ADAPTATION: self.models.INTERACTIVE_LANGUAGE_MODELS,
ModelCategory.CONTEXT_AWARE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
ModelCategory.PERSONALITY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
ModelCategory.ROLE_PLAY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
ModelCategory.DOMAIN_SPECIFIC_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS,
}
def get_model_by_id(self, model_id: str) -> Optional[HFModel]:
"""Find a model by its Hugging Face model ID"""
for models_list in self.get_all_models().values():
for model in models_list:
if model.model_id == model_id:
return model
return None
async def call_model(self, model_id: str, category: ModelCategory, **kwargs) -> Any:
"""Call a Hugging Face model with the appropriate method based on category"""
async with HuggingFaceInference(self.api_token) as hf:
if category == ModelCategory.TEXT_GENERATION:
return await hf.text_generation(model_id, **kwargs)
elif category == ModelCategory.TEXT_TO_IMAGE:
return await hf.text_to_image(model_id, **kwargs)
elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
return await hf.automatic_speech_recognition(model_id, **kwargs)
elif category == ModelCategory.TEXT_TO_SPEECH:
return await hf.text_to_speech(model_id, **kwargs)
elif category == ModelCategory.IMAGE_CLASSIFICATION:
return await hf.image_classification(model_id, **kwargs)
elif category == ModelCategory.FEATURE_EXTRACTION:
return await hf.feature_extraction(model_id, **kwargs)
elif category == ModelCategory.TRANSLATION:
return await hf.translation(model_id, **kwargs)
elif category == ModelCategory.SUMMARIZATION:
return await hf.summarization(model_id, **kwargs)
elif category == ModelCategory.QUESTION_ANSWERING:
return await hf.question_answering(model_id, **kwargs)
elif category == ModelCategory.ZERO_SHOT_CLASSIFICATION:
return await hf.zero_shot_classification(model_id, **kwargs)
elif category == ModelCategory.CONVERSATIONAL:
return await hf.conversational(model_id, **kwargs)
# Video and Motion categories
elif category in [
ModelCategory.TEXT_TO_VIDEO,
ModelCategory.VIDEO_GENERATION,
]:
return await hf.text_to_video(model_id, **kwargs)
elif category == ModelCategory.VIDEO_TO_TEXT:
return await hf.video_to_text(model_id, **kwargs)
elif category == ModelCategory.VIDEO_CLASSIFICATION:
return await hf.image_classification(
model_id, **kwargs
) # Similar to image classification
# Code and Development categories
elif category in [
ModelCategory.CODE_GENERATION,
ModelCategory.APP_GENERATION,
]:
return await hf.code_generation(model_id, **kwargs)
elif category in [
ModelCategory.CODE_COMPLETION,
ModelCategory.CODE_EXPLANATION,
]:
return await hf.code_completion(model_id, **kwargs)
# 3D and AR/VR categories
elif category in [
ModelCategory.TEXT_TO_3D,
ModelCategory.THREE_D_GENERATION,
]:
return await hf.text_to_3d(model_id, **kwargs)
elif category in [ModelCategory.IMAGE_TO_3D, ModelCategory.MESH_GENERATION]:
return await hf.image_to_3d(model_id, **kwargs)
# Document Processing categories
elif category == ModelCategory.OCR:
return await hf.ocr(model_id, **kwargs)
elif category in [
ModelCategory.DOCUMENT_ANALYSIS,
ModelCategory.FORM_PROCESSING,
ModelCategory.TABLE_EXTRACTION,
ModelCategory.LAYOUT_ANALYSIS,
]:
return await hf.document_analysis(model_id, **kwargs)
elif category == ModelCategory.HANDWRITING_RECOGNITION:
return await hf.ocr(model_id, **kwargs) # Similar to OCR
# Multimodal AI categories
elif category in [
ModelCategory.VISION_LANGUAGE,
ModelCategory.VISUAL_QUESTION_ANSWERING,
ModelCategory.IMAGE_TEXT_MATCHING,
]:
return await hf.vision_language(model_id, **kwargs)
elif category in [
ModelCategory.MULTIMODAL_REASONING,
ModelCategory.MULTIMODAL_CHAT,
ModelCategory.CROSS_MODAL_GENERATION,
]:
return await hf.multimodal_reasoning(model_id, **kwargs)
# Specialized AI categories
elif category == ModelCategory.MUSIC_GENERATION:
return await hf.music_generation(model_id, **kwargs)
elif category == ModelCategory.VOICE_CLONING:
return await hf.voice_cloning(model_id, **kwargs)
elif category == ModelCategory.SUPER_RESOLUTION:
return await hf.super_resolution(model_id, **kwargs)
elif category in [
ModelCategory.FACE_RESTORATION,
ModelCategory.IMAGE_INPAINTING,
ModelCategory.IMAGE_OUTPAINTING,
]:
return await hf.super_resolution(
model_id, **kwargs
) # Similar processing
elif category == ModelCategory.BACKGROUND_REMOVAL:
return await hf.background_removal(model_id, **kwargs)
# Creative Content categories
elif category in [
ModelCategory.CREATIVE_WRITING,
ModelCategory.STORY_GENERATION,
ModelCategory.POETRY_GENERATION,
ModelCategory.SCREENPLAY_WRITING,
]:
return await hf.creative_writing(model_id, **kwargs)
elif category in [ModelCategory.BLOG_WRITING, ModelCategory.MARKETING_COPY]:
return await hf.text_generation(
model_id, **kwargs
) # Use standard text generation
# Game Development categories
elif category in [
ModelCategory.CHARACTER_GENERATION,
ModelCategory.LEVEL_GENERATION,
ModelCategory.DIALOGUE_GENERATION,
ModelCategory.GAME_ASSET_GENERATION,
]:
return await hf.creative_writing(
model_id, **kwargs
) # Creative generation
# Science and Research categories
elif category in [
ModelCategory.PROTEIN_FOLDING,
ModelCategory.MOLECULE_GENERATION,
]:
return await hf.text_generation(
model_id, **kwargs
) # Specialized text generation
elif category in [
ModelCategory.SCIENTIFIC_WRITING,
ModelCategory.RESEARCH_ASSISTANCE,
ModelCategory.DATA_ANALYSIS,
]:
return await hf.text_generation(model_id, **kwargs)
# Business and Productivity categories
elif category in [
ModelCategory.EMAIL_GENERATION,
ModelCategory.PRESENTATION_CREATION,
ModelCategory.REPORT_GENERATION,
ModelCategory.MEETING_SUMMARIZATION,
ModelCategory.PROJECT_PLANNING,
]:
return await hf.business_document(model_id, category.value, **kwargs)
# AI Teacher and Education categories
elif category in [
ModelCategory.AI_TUTORING,
ModelCategory.EDUCATIONAL_CONTENT,
ModelCategory.LESSON_PLANNING,
ModelCategory.CONCEPT_EXPLANATION,
ModelCategory.HOMEWORK_ASSISTANCE,
ModelCategory.QUIZ_GENERATION,
ModelCategory.CURRICULUM_DESIGN,
ModelCategory.LEARNING_ASSESSMENT,
ModelCategory.ADAPTIVE_LEARNING,
ModelCategory.SUBJECT_TEACHING,
ModelCategory.MATH_TUTORING,
ModelCategory.SCIENCE_TUTORING,
ModelCategory.LANGUAGE_TUTORING,
ModelCategory.HISTORY_TUTORING,
ModelCategory.CODING_INSTRUCTION,
ModelCategory.EXAM_PREPARATION,
ModelCategory.STUDY_GUIDE_CREATION,
ModelCategory.EDUCATIONAL_GAMES,
ModelCategory.LEARNING_ANALYTICS,
ModelCategory.PERSONALIZED_LEARNING,
]:
return await hf.text_generation(
model_id, **kwargs
) # Educational content generation
# Qwen Model categories
elif category in [
ModelCategory.QWEN_REASONING,
ModelCategory.QWEN_MATH,
ModelCategory.QWEN_CODE,
]:
return await hf.text_generation(model_id, **kwargs)
elif category == ModelCategory.QWEN_VISION:
return await hf.vision_language(model_id, **kwargs)
elif category == ModelCategory.QWEN_AUDIO:
return await hf.automatic_speech_recognition(model_id, **kwargs)
# DeepSeek Model categories
elif category in [
ModelCategory.DEEPSEEK_CODING,
ModelCategory.DEEPSEEK_REASONING,
ModelCategory.DEEPSEEK_MATH,
ModelCategory.DEEPSEEK_RESEARCH,
]:
return await hf.text_generation(model_id, **kwargs)
# Advanced Image Processing & Manipulation
elif category in [
ModelCategory.IMAGE_EDITING,
ModelCategory.PORTRAIT_EDITING,
ModelCategory.PHOTO_RESTORATION,
ModelCategory.COLOR_CORRECTION,
ModelCategory.ARTISTIC_FILTER,
]:
return await hf.text_to_image(model_id, **kwargs) # Image processing
elif category == ModelCategory.IMAGE_UPSCALING:
return await hf.super_resolution(model_id, **kwargs)
elif category in [
ModelCategory.FACE_SWAP,
ModelCategory.FACE_ENHANCEMENT,
ModelCategory.FACE_GENERATION,
]:
return await hf.text_to_image(model_id, **kwargs) # Face manipulation
# Advanced Speech & Audio
elif category in [
ModelCategory.ADVANCED_TTS,
ModelCategory.MULTILINGUAL_TTS,
ModelCategory.VOICE_CONVERSION,
]:
return await hf.text_to_speech(model_id, **kwargs)
elif category in [
ModelCategory.ADVANCED_STT,
ModelCategory.MULTILINGUAL_STT,
ModelCategory.SPEECH_ENHANCEMENT,
]:
return await hf.automatic_speech_recognition(model_id, **kwargs)
elif category in [
ModelCategory.AUDIO_GENERATION,
ModelCategory.REAL_TIME_TRANSLATION,
]:
return await hf.text_to_speech(model_id, **kwargs) # Audio generation
# Interactive Avatar & Video Generation
elif category in [
ModelCategory.TALKING_AVATAR,
ModelCategory.AVATAR_GENERATION,
ModelCategory.LIP_SYNC,
ModelCategory.FACIAL_ANIMATION,
ModelCategory.GESTURE_GENERATION,
ModelCategory.VIRTUAL_PRESENTER,
ModelCategory.AI_ANCHOR,
]:
return await hf.text_to_video(model_id, **kwargs) # Video generation
# Interactive Language & Conversation
elif category in [
ModelCategory.INTERACTIVE_CHAT,
ModelCategory.BILINGUAL_CONVERSATION,
ModelCategory.CULTURAL_ADAPTATION,
ModelCategory.CONTEXT_AWARE_CHAT,
ModelCategory.PERSONALITY_CHAT,
ModelCategory.ROLE_PLAY_CHAT,
ModelCategory.DOMAIN_SPECIFIC_CHAT,
]:
return await hf.conversational(model_id, **kwargs)
else:
raise ValueError(f"Unsupported model category: {category}")