Spaces:
Paused
Paused
| """ | |
| Hugging Face Models Integration for OpenManus AI Agent | |
| Comprehensive integration with Hugging Face Inference API for all model categories | |
| """ | |
| import asyncio | |
| import base64 | |
| import io | |
| import json | |
| import logging | |
| from dataclasses import dataclass | |
| from enum import Enum | |
| from typing import Any, Dict, List, Optional, Union | |
| import aiohttp | |
| import PIL.Image | |
| from pydantic import BaseModel | |
| logger = logging.getLogger(__name__) | |
| class ModelCategory(Enum): | |
| """Categories of Hugging Face models available""" | |
| # Core AI categories | |
| TEXT_GENERATION = "text-generation" | |
| TEXT_TO_IMAGE = "text-to-image" | |
| IMAGE_TO_TEXT = "image-to-text" | |
| AUTOMATIC_SPEECH_RECOGNITION = "automatic-speech-recognition" | |
| TEXT_TO_SPEECH = "text-to-speech" | |
| IMAGE_CLASSIFICATION = "image-classification" | |
| OBJECT_DETECTION = "object-detection" | |
| FEATURE_EXTRACTION = "feature-extraction" | |
| SENTENCE_SIMILARITY = "sentence-similarity" | |
| TRANSLATION = "translation" | |
| SUMMARIZATION = "summarization" | |
| QUESTION_ANSWERING = "question-answering" | |
| FILL_MASK = "fill-mask" | |
| TOKEN_CLASSIFICATION = "token-classification" | |
| ZERO_SHOT_CLASSIFICATION = "zero-shot-classification" | |
| AUDIO_CLASSIFICATION = "audio-classification" | |
| CONVERSATIONAL = "conversational" | |
| # Video and Motion | |
| TEXT_TO_VIDEO = "text-to-video" | |
| VIDEO_TO_TEXT = "video-to-text" | |
| VIDEO_CLASSIFICATION = "video-classification" | |
| VIDEO_GENERATION = "video-generation" | |
| MOTION_GENERATION = "motion-generation" | |
| DEEPFAKE_DETECTION = "deepfake-detection" | |
| # Code and Development | |
| CODE_GENERATION = "code-generation" | |
| CODE_COMPLETION = "code-completion" | |
| CODE_EXPLANATION = "code-explanation" | |
| CODE_TRANSLATION = "code-translation" | |
| CODE_REVIEW = "code-review" | |
| APP_GENERATION = "app-generation" | |
| API_GENERATION = "api-generation" | |
| DATABASE_GENERATION = "database-generation" | |
| # 3D and AR/VR | |
| TEXT_TO_3D = "text-to-3d" | |
| IMAGE_TO_3D = "image-to-3d" | |
| THREE_D_GENERATION = "3d-generation" | |
| MESH_GENERATION = "mesh-generation" | |
| TEXTURE_GENERATION = "texture-generation" | |
| AR_CONTENT = "ar-content" | |
| VR_ENVIRONMENT = "vr-environment" | |
| # Document Processing | |
| OCR = "ocr" | |
| DOCUMENT_ANALYSIS = "document-analysis" | |
| PDF_PROCESSING = "pdf-processing" | |
| LAYOUT_ANALYSIS = "layout-analysis" | |
| TABLE_EXTRACTION = "table-extraction" | |
| HANDWRITING_RECOGNITION = "handwriting-recognition" | |
| FORM_PROCESSING = "form-processing" | |
| # Multimodal AI | |
| VISION_LANGUAGE = "vision-language" | |
| MULTIMODAL_REASONING = "multimodal-reasoning" | |
| CROSS_MODAL_GENERATION = "cross-modal-generation" | |
| VISUAL_QUESTION_ANSWERING = "visual-question-answering" | |
| IMAGE_TEXT_MATCHING = "image-text-matching" | |
| MULTIMODAL_CHAT = "multimodal-chat" | |
| # Specialized AI | |
| MUSIC_GENERATION = "music-generation" | |
| VOICE_CLONING = "voice-cloning" | |
| STYLE_TRANSFER = "style-transfer" | |
| SUPER_RESOLUTION = "super-resolution" | |
| IMAGE_INPAINTING = "image-inpainting" | |
| IMAGE_OUTPAINTING = "image-outpainting" | |
| BACKGROUND_REMOVAL = "background-removal" | |
| FACE_RESTORATION = "face-restoration" | |
| # Content Creation | |
| CREATIVE_WRITING = "creative-writing" | |
| STORY_GENERATION = "story-generation" | |
| SCREENPLAY_WRITING = "screenplay-writing" | |
| POETRY_GENERATION = "poetry-generation" | |
| BLOG_WRITING = "blog-writing" | |
| MARKETING_COPY = "marketing-copy" | |
| # Game Development | |
| GAME_ASSET_GENERATION = "game-asset-generation" | |
| CHARACTER_GENERATION = "character-generation" | |
| LEVEL_GENERATION = "level-generation" | |
| DIALOGUE_GENERATION = "dialogue-generation" | |
| # Science and Research | |
| PROTEIN_FOLDING = "protein-folding" | |
| MOLECULE_GENERATION = "molecule-generation" | |
| SCIENTIFIC_WRITING = "scientific-writing" | |
| RESEARCH_ASSISTANCE = "research-assistance" | |
| DATA_ANALYSIS = "data-analysis" | |
| # Business and Productivity | |
| EMAIL_GENERATION = "email-generation" | |
| PRESENTATION_CREATION = "presentation-creation" | |
| REPORT_GENERATION = "report-generation" | |
| MEETING_SUMMARIZATION = "meeting-summarization" | |
| PROJECT_PLANNING = "project-planning" | |
| # AI Teacher and Education | |
| AI_TUTORING = "ai-tutoring" | |
| EDUCATIONAL_CONTENT = "educational-content" | |
| LESSON_PLANNING = "lesson-planning" | |
| CONCEPT_EXPLANATION = "concept-explanation" | |
| HOMEWORK_ASSISTANCE = "homework-assistance" | |
| QUIZ_GENERATION = "quiz-generation" | |
| CURRICULUM_DESIGN = "curriculum-design" | |
| LEARNING_ASSESSMENT = "learning-assessment" | |
| ADAPTIVE_LEARNING = "adaptive-learning" | |
| SUBJECT_TEACHING = "subject-teaching" | |
| MATH_TUTORING = "math-tutoring" | |
| SCIENCE_TUTORING = "science-tutoring" | |
| LANGUAGE_TUTORING = "language-tutoring" | |
| HISTORY_TUTORING = "history-tutoring" | |
| CODING_INSTRUCTION = "coding-instruction" | |
| EXAM_PREPARATION = "exam-preparation" | |
| STUDY_GUIDE_CREATION = "study-guide-creation" | |
| EDUCATIONAL_GAMES = "educational-games" | |
| LEARNING_ANALYTICS = "learning-analytics" | |
| PERSONALIZED_LEARNING = "personalized-learning" | |
| # Advanced Image Processing & Manipulation | |
| IMAGE_EDITING = "image-editing" | |
| FACE_SWAP = "face-swap" | |
| FACE_ENHANCEMENT = "face-enhancement" | |
| FACE_GENERATION = "face-generation" | |
| PORTRAIT_EDITING = "portrait-editing" | |
| PHOTO_RESTORATION = "photo-restoration" | |
| IMAGE_UPSCALING = "image-upscaling" | |
| COLOR_CORRECTION = "color-correction" | |
| ARTISTIC_FILTER = "artistic-filter" | |
| # Advanced Speech & Audio | |
| ADVANCED_TTS = "advanced-tts" | |
| ADVANCED_STT = "advanced-stt" | |
| VOICE_CONVERSION = "voice-conversion" | |
| SPEECH_ENHANCEMENT = "speech-enhancement" | |
| AUDIO_GENERATION = "audio-generation" | |
| MULTILINGUAL_TTS = "multilingual-tts" | |
| MULTILINGUAL_STT = "multilingual-stt" | |
| REAL_TIME_TRANSLATION = "real-time-translation" | |
| # Interactive Avatar & Video Generation | |
| TALKING_AVATAR = "talking-avatar" | |
| AVATAR_GENERATION = "avatar-generation" | |
| LIP_SYNC = "lip-sync" | |
| FACIAL_ANIMATION = "facial-animation" | |
| GESTURE_GENERATION = "gesture-generation" | |
| VIRTUAL_PRESENTER = "virtual-presenter" | |
| AI_ANCHOR = "ai-anchor" | |
| # Interactive Language & Conversation | |
| INTERACTIVE_CHAT = "interactive-chat" | |
| BILINGUAL_CONVERSATION = "bilingual-conversation" | |
| CULTURAL_ADAPTATION = "cultural-adaptation" | |
| CONTEXT_AWARE_CHAT = "context-aware-chat" | |
| PERSONALITY_CHAT = "personality-chat" | |
| ROLE_PLAY_CHAT = "role-play-chat" | |
| DOMAIN_SPECIFIC_CHAT = "domain-specific-chat" | |
| # Qwen Specialized Categories | |
| QWEN_REASONING = "qwen-reasoning" | |
| QWEN_MATH = "qwen-math" | |
| QWEN_CODE = "qwen-code" | |
| QWEN_VISION = "qwen-vision" | |
| QWEN_AUDIO = "qwen-audio" | |
| # DeepSeek Specialized Categories | |
| DEEPSEEK_CODING = "deepseek-coding" | |
| DEEPSEEK_REASONING = "deepseek-reasoning" | |
| DEEPSEEK_MATH = "deepseek-math" | |
| DEEPSEEK_RESEARCH = "deepseek-research" | |
| class HFModel: | |
| """Hugging Face model definition""" | |
| name: str | |
| model_id: str | |
| category: ModelCategory | |
| description: str | |
| endpoint_compatible: bool = False | |
| requires_auth: bool = False | |
| max_tokens: Optional[int] = None | |
| supports_streaming: bool = False | |
| class HuggingFaceModels: | |
| """Comprehensive collection of Hugging Face models for all categories""" | |
| # Text Generation Models (Latest and Popular) | |
| TEXT_GENERATION_MODELS = [ | |
| HFModel( | |
| "MiniMax-M2", | |
| "MiniMaxAI/MiniMax-M2", | |
| ModelCategory.TEXT_GENERATION, | |
| "Latest high-performance text generation model", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| HFModel( | |
| "Kimi Linear 48B", | |
| "moonshotai/Kimi-Linear-48B-A3B-Instruct", | |
| ModelCategory.TEXT_GENERATION, | |
| "Large instruction-tuned model with linear attention", | |
| True, | |
| False, | |
| 8192, | |
| True, | |
| ), | |
| HFModel( | |
| "GPT-OSS 20B", | |
| "openai/gpt-oss-20b", | |
| ModelCategory.TEXT_GENERATION, | |
| "Open-source GPT model by OpenAI", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| HFModel( | |
| "GPT-OSS 120B", | |
| "openai/gpt-oss-120b", | |
| ModelCategory.TEXT_GENERATION, | |
| "Large open-source GPT model", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| HFModel( | |
| "Granite 4.0 1B", | |
| "ibm-granite/granite-4.0-1b", | |
| ModelCategory.TEXT_GENERATION, | |
| "IBM's enterprise-grade small language model", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "GLM-4.6", | |
| "zai-org/GLM-4.6", | |
| ModelCategory.TEXT_GENERATION, | |
| "Multilingual conversational model", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| HFModel( | |
| "Llama 3.1 8B Instruct", | |
| "meta-llama/Llama-3.1-8B-Instruct", | |
| ModelCategory.TEXT_GENERATION, | |
| "Meta's instruction-tuned Llama model", | |
| True, | |
| True, | |
| 8192, | |
| True, | |
| ), | |
| HFModel( | |
| "Tongyi DeepResearch 30B", | |
| "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B", | |
| ModelCategory.TEXT_GENERATION, | |
| "Alibaba's research-focused large language model", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| HFModel( | |
| "EuroLLM 9B", | |
| "utter-project/EuroLLM-9B", | |
| ModelCategory.TEXT_GENERATION, | |
| "European multilingual language model", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| ] | |
| # Text-to-Image Models (Latest and Best) | |
| TEXT_TO_IMAGE_MODELS = [ | |
| HFModel( | |
| "FIBO", | |
| "briaai/FIBO", | |
| ModelCategory.TEXT_TO_IMAGE, | |
| "Advanced text-to-image generation model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "FLUX.1 Dev", | |
| "black-forest-labs/FLUX.1-dev", | |
| ModelCategory.TEXT_TO_IMAGE, | |
| "State-of-the-art image generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "FLUX.1 Schnell", | |
| "black-forest-labs/FLUX.1-schnell", | |
| ModelCategory.TEXT_TO_IMAGE, | |
| "Fast high-quality image generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Qwen Image", | |
| "Qwen/Qwen-Image", | |
| ModelCategory.TEXT_TO_IMAGE, | |
| "Multilingual text-to-image model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Stable Diffusion XL", | |
| "stabilityai/stable-diffusion-xl-base-1.0", | |
| ModelCategory.TEXT_TO_IMAGE, | |
| "Popular high-resolution image generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Stable Diffusion 3.5 Large", | |
| "stabilityai/stable-diffusion-3.5-large", | |
| ModelCategory.TEXT_TO_IMAGE, | |
| "Latest Stable Diffusion model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "HunyuanImage 3.0", | |
| "tencent/HunyuanImage-3.0", | |
| ModelCategory.TEXT_TO_IMAGE, | |
| "Tencent's advanced image generation model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Nitro-E", | |
| "amd/Nitro-E", | |
| ModelCategory.TEXT_TO_IMAGE, | |
| "AMD's efficient image generation model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Qwen Image Lightning", | |
| "lightx2v/Qwen-Image-Lightning", | |
| ModelCategory.TEXT_TO_IMAGE, | |
| "Fast distilled image generation", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Automatic Speech Recognition Models | |
| ASR_MODELS = [ | |
| HFModel( | |
| "Whisper Large v3", | |
| "openai/whisper-large-v3", | |
| ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, | |
| "OpenAI's best multilingual speech recognition", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Whisper Large v3 Turbo", | |
| "openai/whisper-large-v3-turbo", | |
| ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, | |
| "Faster version of Whisper Large v3", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Parakeet TDT 0.6B v3", | |
| "nvidia/parakeet-tdt-0.6b-v3", | |
| ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, | |
| "NVIDIA's multilingual ASR model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Canary Qwen 2.5B", | |
| "nvidia/canary-qwen-2.5b", | |
| ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, | |
| "NVIDIA's advanced ASR with Qwen integration", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Canary 1B v2", | |
| "nvidia/canary-1b-v2", | |
| ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, | |
| "Compact multilingual ASR model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Whisper Small", | |
| "openai/whisper-small", | |
| ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, | |
| "Lightweight multilingual ASR", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Speaker Diarization 3.1", | |
| "pyannote/speaker-diarization-3.1", | |
| ModelCategory.AUTOMATIC_SPEECH_RECOGNITION, | |
| "Advanced speaker identification and diarization", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Text-to-Speech Models | |
| TTS_MODELS = [ | |
| HFModel( | |
| "SoulX Podcast 1.7B", | |
| "Soul-AILab/SoulX-Podcast-1.7B", | |
| ModelCategory.TEXT_TO_SPEECH, | |
| "High-quality podcast-style speech synthesis", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "NeuTTS Air", | |
| "neuphonic/neutts-air", | |
| ModelCategory.TEXT_TO_SPEECH, | |
| "Advanced neural text-to-speech", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Kokoro 82M", | |
| "hexgrad/Kokoro-82M", | |
| ModelCategory.TEXT_TO_SPEECH, | |
| "Lightweight high-quality TTS", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Kani TTS 400M EN", | |
| "nineninesix/kani-tts-400m-en", | |
| ModelCategory.TEXT_TO_SPEECH, | |
| "English-focused text-to-speech model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "XTTS v2", | |
| "coqui/XTTS-v2", | |
| ModelCategory.TEXT_TO_SPEECH, | |
| "Zero-shot voice cloning TTS", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Chatterbox", | |
| "ResembleAI/chatterbox", | |
| ModelCategory.TEXT_TO_SPEECH, | |
| "Multilingual voice cloning", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "VibeVoice 1.5B", | |
| "microsoft/VibeVoice-1.5B", | |
| ModelCategory.TEXT_TO_SPEECH, | |
| "Microsoft's advanced TTS model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "OpenAudio S1 Mini", | |
| "fishaudio/openaudio-s1-mini", | |
| ModelCategory.TEXT_TO_SPEECH, | |
| "Compact multilingual TTS", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Image Classification Models | |
| IMAGE_CLASSIFICATION_MODELS = [ | |
| HFModel( | |
| "NSFW Image Detection", | |
| "Falconsai/nsfw_image_detection", | |
| ModelCategory.IMAGE_CLASSIFICATION, | |
| "Content safety image classification", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "ViT Base Patch16", | |
| "google/vit-base-patch16-224", | |
| ModelCategory.IMAGE_CLASSIFICATION, | |
| "Google's Vision Transformer", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Deepfake Detection", | |
| "dima806/deepfake_vs_real_image_detection", | |
| ModelCategory.IMAGE_CLASSIFICATION, | |
| "Detect AI-generated vs real images", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Facial Emotions Detection", | |
| "dima806/facial_emotions_image_detection", | |
| ModelCategory.IMAGE_CLASSIFICATION, | |
| "Recognize facial emotions", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "SDXL Detector", | |
| "Organika/sdxl-detector", | |
| ModelCategory.IMAGE_CLASSIFICATION, | |
| "Detect Stable Diffusion XL generated images", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "ViT NSFW Detector", | |
| "AdamCodd/vit-base-nsfw-detector", | |
| ModelCategory.IMAGE_CLASSIFICATION, | |
| "NSFW content detection with ViT", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "ResNet 101", | |
| "microsoft/resnet-101", | |
| ModelCategory.IMAGE_CLASSIFICATION, | |
| "Microsoft's ResNet for classification", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Additional Categories | |
| FEATURE_EXTRACTION_MODELS = [ | |
| HFModel( | |
| "Sentence Transformers All MiniLM", | |
| "sentence-transformers/all-MiniLM-L6-v2", | |
| ModelCategory.FEATURE_EXTRACTION, | |
| "Lightweight sentence embeddings", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "BGE Large EN", | |
| "BAAI/bge-large-en-v1.5", | |
| ModelCategory.FEATURE_EXTRACTION, | |
| "High-quality English embeddings", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "E5 Large v2", | |
| "intfloat/e5-large-v2", | |
| ModelCategory.FEATURE_EXTRACTION, | |
| "Multilingual text embeddings", | |
| True, | |
| False, | |
| ), | |
| ] | |
| TRANSLATION_MODELS = [ | |
| HFModel( | |
| "M2M100 1.2B", | |
| "facebook/m2m100_1.2B", | |
| ModelCategory.TRANSLATION, | |
| "Multilingual machine translation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "NLLB 200 3.3B", | |
| "facebook/nllb-200-3.3B", | |
| ModelCategory.TRANSLATION, | |
| "No Language Left Behind translation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "mBART Large 50", | |
| "facebook/mbart-large-50-many-to-many-mmt", | |
| ModelCategory.TRANSLATION, | |
| "Multilingual BART for translation", | |
| True, | |
| False, | |
| ), | |
| ] | |
| SUMMARIZATION_MODELS = [ | |
| HFModel( | |
| "PEGASUS XSum", | |
| "google/pegasus-xsum", | |
| ModelCategory.SUMMARIZATION, | |
| "Abstractive summarization model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "BART Large CNN", | |
| "facebook/bart-large-cnn", | |
| ModelCategory.SUMMARIZATION, | |
| "CNN/DailyMail summarization", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "T5 Base", | |
| "t5-base", | |
| ModelCategory.SUMMARIZATION, | |
| "Text-to-Text Transfer Transformer", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Video Generation and Processing Models | |
| VIDEO_GENERATION_MODELS = [ | |
| HFModel( | |
| "Stable Video Diffusion", | |
| "stabilityai/stable-video-diffusion-img2vid", | |
| ModelCategory.TEXT_TO_VIDEO, | |
| "Image-to-video generation model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "AnimateDiff", | |
| "guoyww/animatediff", | |
| ModelCategory.VIDEO_GENERATION, | |
| "Text-to-video animation generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "VideoCrafter", | |
| "videogen/VideoCrafter", | |
| ModelCategory.TEXT_TO_VIDEO, | |
| "High-quality text-to-video generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Video ChatGPT", | |
| "mbzuai-oryx/Video-ChatGPT-7B", | |
| ModelCategory.VIDEO_TO_TEXT, | |
| "Video understanding and description", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Video-BLIP", | |
| "salesforce/video-blip-opt-2.7b", | |
| ModelCategory.VIDEO_CLASSIFICATION, | |
| "Video content analysis and classification", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Code Generation and Development Models | |
| CODE_GENERATION_MODELS = [ | |
| HFModel( | |
| "CodeLlama 34B Instruct", | |
| "codellama/CodeLlama-34b-Instruct-hf", | |
| ModelCategory.CODE_GENERATION, | |
| "Large instruction-tuned code generation model", | |
| True, | |
| True, | |
| ), | |
| HFModel( | |
| "StarCoder2 15B", | |
| "bigcode/starcoder2-15b", | |
| ModelCategory.CODE_GENERATION, | |
| "Advanced code generation and completion", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "DeepSeek Coder V2", | |
| "deepseek-ai/deepseek-coder-6.7b-instruct", | |
| ModelCategory.CODE_GENERATION, | |
| "Specialized coding assistant", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "WizardCoder 34B", | |
| "WizardLM/WizardCoder-Python-34B-V1.0", | |
| ModelCategory.CODE_GENERATION, | |
| "Python-focused code generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Phind CodeLlama", | |
| "Phind/Phind-CodeLlama-34B-v2", | |
| ModelCategory.CODE_GENERATION, | |
| "Optimized for code explanation and debugging", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Code T5+", | |
| "Salesforce/codet5p-770m", | |
| ModelCategory.CODE_COMPLETION, | |
| "Code understanding and generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "InCoder", | |
| "facebook/incoder-6B", | |
| ModelCategory.CODE_COMPLETION, | |
| "Bidirectional code generation", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # 3D and AR/VR Content Generation Models | |
| THREE_D_MODELS = [ | |
| HFModel( | |
| "Shap-E", | |
| "openai/shap-e", | |
| ModelCategory.TEXT_TO_3D, | |
| "Text-to-3D shape generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Point-E", | |
| "openai/point-e", | |
| ModelCategory.TEXT_TO_3D, | |
| "Text-to-3D point cloud generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "DreamFusion", | |
| "google/dreamfusion", | |
| ModelCategory.IMAGE_TO_3D, | |
| "Image-to-3D mesh generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Magic3D", | |
| "nvidia/magic3d", | |
| ModelCategory.THREE_D_GENERATION, | |
| "High-quality 3D content creation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "GET3D", | |
| "nvidia/get3d", | |
| ModelCategory.MESH_GENERATION, | |
| "3D mesh generation from text", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Document Processing and OCR Models | |
| DOCUMENT_PROCESSING_MODELS = [ | |
| HFModel( | |
| "TrOCR Large", | |
| "microsoft/trocr-large-printed", | |
| ModelCategory.OCR, | |
| "Transformer-based OCR for printed text", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "TrOCR Handwritten", | |
| "microsoft/trocr-large-handwritten", | |
| ModelCategory.HANDWRITING_RECOGNITION, | |
| "Handwritten text recognition", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "LayoutLMv3", | |
| "microsoft/layoutlmv3-large", | |
| ModelCategory.DOCUMENT_ANALYSIS, | |
| "Document layout analysis and understanding", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Donut", | |
| "naver-clova-ix/donut-base", | |
| ModelCategory.DOCUMENT_ANALYSIS, | |
| "OCR-free document understanding", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "TableTransformer", | |
| "microsoft/table-transformer-structure-recognition", | |
| ModelCategory.TABLE_EXTRACTION, | |
| "Table structure recognition", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "FormNet", | |
| "microsoft/formnet", | |
| ModelCategory.FORM_PROCESSING, | |
| "Form understanding and processing", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Multimodal AI Models | |
| MULTIMODAL_MODELS = [ | |
| HFModel( | |
| "BLIP-2", | |
| "Salesforce/blip2-opt-2.7b", | |
| ModelCategory.VISION_LANGUAGE, | |
| "Vision-language understanding and generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "InstructBLIP", | |
| "Salesforce/instructblip-vicuna-7b", | |
| ModelCategory.MULTIMODAL_REASONING, | |
| "Instruction-following multimodal model", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "LLaVA", | |
| "liuhaotian/llava-v1.5-7b", | |
| ModelCategory.VISUAL_QUESTION_ANSWERING, | |
| "Large Language and Vision Assistant", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "GPT-4V", | |
| "openai/gpt-4-vision-preview", | |
| ModelCategory.MULTIMODAL_CHAT, | |
| "Advanced multimodal conversational AI", | |
| True, | |
| True, | |
| ), | |
| HFModel( | |
| "Flamingo", | |
| "deepmind/flamingo-9b", | |
| ModelCategory.CROSS_MODAL_GENERATION, | |
| "Few-shot learning for vision and language", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Specialized AI Models | |
| SPECIALIZED_AI_MODELS = [ | |
| HFModel( | |
| "MusicGen", | |
| "facebook/musicgen-medium", | |
| ModelCategory.MUSIC_GENERATION, | |
| "Text-to-music generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "AudioCraft", | |
| "facebook/audiocraft_musicgen_melody", | |
| ModelCategory.MUSIC_GENERATION, | |
| "Melody-conditioned music generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Real-ESRGAN", | |
| "xinntao/realesrgan-x4plus", | |
| ModelCategory.SUPER_RESOLUTION, | |
| "Image super-resolution", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "GFPGAN", | |
| "TencentARC/GFPGAN", | |
| ModelCategory.FACE_RESTORATION, | |
| "Face restoration and enhancement", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "LaMa", | |
| "advimman/lama", | |
| ModelCategory.IMAGE_INPAINTING, | |
| "Large Mask Inpainting", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Background Remover", | |
| "briaai/RMBG-1.4", | |
| ModelCategory.BACKGROUND_REMOVAL, | |
| "Automatic background removal", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Voice Cloner", | |
| "coqui/XTTS-v2", | |
| ModelCategory.VOICE_CLONING, | |
| "Multilingual voice cloning", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Creative Content Models | |
| CREATIVE_CONTENT_MODELS = [ | |
| HFModel( | |
| "GPT-3.5 Creative", | |
| "openai/gpt-3.5-turbo-instruct", | |
| ModelCategory.CREATIVE_WRITING, | |
| "Creative writing and storytelling", | |
| True, | |
| True, | |
| ), | |
| HFModel( | |
| "Novel AI", | |
| "novelai/genji-python-6b", | |
| ModelCategory.STORY_GENERATION, | |
| "Interactive story generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Poet Assistant", | |
| "gpt2-poetry", | |
| ModelCategory.POETRY_GENERATION, | |
| "Poetry generation and analysis", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Blog Writer", | |
| "google/flan-t5-large", | |
| ModelCategory.BLOG_WRITING, | |
| "Blog content creation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Marketing Copy AI", | |
| "microsoft/DialoGPT-large", | |
| ModelCategory.MARKETING_COPY, | |
| "Marketing content generation", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Game Development Models | |
| GAME_DEVELOPMENT_MODELS = [ | |
| HFModel( | |
| "Character AI", | |
| "character-ai/character-generator", | |
| ModelCategory.CHARACTER_GENERATION, | |
| "Game character generation and design", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Level Designer", | |
| "unity/level-generator", | |
| ModelCategory.LEVEL_GENERATION, | |
| "Game level and environment generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Dialogue Writer", | |
| "bioware/dialogue-generator", | |
| ModelCategory.DIALOGUE_GENERATION, | |
| "Game dialogue and narrative generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Asset Creator", | |
| "epic/asset-generator", | |
| ModelCategory.GAME_ASSET_GENERATION, | |
| "Game asset and texture generation", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Science and Research Models | |
| SCIENCE_RESEARCH_MODELS = [ | |
| HFModel( | |
| "AlphaFold", | |
| "deepmind/alphafold2", | |
| ModelCategory.PROTEIN_FOLDING, | |
| "Protein structure prediction", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "ChemBERTa", | |
| "DeepChem/ChemBERTa-77M-MLM", | |
| ModelCategory.MOLECULE_GENERATION, | |
| "Chemical compound analysis", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "SciBERT", | |
| "allenai/scibert_scivocab_uncased", | |
| ModelCategory.SCIENTIFIC_WRITING, | |
| "Scientific text understanding", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Research Assistant", | |
| "microsoft/specter2", | |
| ModelCategory.RESEARCH_ASSISTANCE, | |
| "Research paper analysis and recommendations", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Data Analyst", | |
| "microsoft/data-copilot", | |
| ModelCategory.DATA_ANALYSIS, | |
| "Automated data analysis and insights", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # Business and Productivity Models | |
| BUSINESS_PRODUCTIVITY_MODELS = [ | |
| HFModel( | |
| "Email Assistant", | |
| "microsoft/email-generator", | |
| ModelCategory.EMAIL_GENERATION, | |
| "Professional email composition", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Presentation AI", | |
| "gamma/presentation-generator", | |
| ModelCategory.PRESENTATION_CREATION, | |
| "Automated presentation creation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Report Writer", | |
| "openai/report-generator", | |
| ModelCategory.REPORT_GENERATION, | |
| "Business report generation", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Meeting Summarizer", | |
| "microsoft/meeting-summarizer", | |
| ModelCategory.MEETING_SUMMARIZATION, | |
| "Meeting notes and action items", | |
| True, | |
| False, | |
| ), | |
| HFModel( | |
| "Project Planner", | |
| "atlassian/project-ai", | |
| ModelCategory.PROJECT_PLANNING, | |
| "Project planning and management", | |
| True, | |
| False, | |
| ), | |
| ] | |
| # AI Teacher Models - Best-in-Class Educational AI System | |
| AI_TEACHER_MODELS = [ | |
| # Primary AI Tutoring Models - Interactive & Conversational | |
| HFModel( | |
| "AI Tutor Interactive", | |
| "microsoft/DialoGPT-medium", | |
| ModelCategory.AI_TUTORING, | |
| "Interactive AI tutor for conversational learning with dialogue management", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "Goal-Oriented Tutor", | |
| "microsoft/GODEL-v1_1-large-seq2seq", | |
| ModelCategory.AI_TUTORING, | |
| "Goal-oriented conversational AI for personalized tutoring sessions", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "Advanced Instruction Tutor", | |
| "google/flan-t5-large", | |
| ModelCategory.AI_TUTORING, | |
| "Advanced instruction-following AI tutor for complex educational tasks", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| # Educational Content Generation - Creative & Comprehensive | |
| HFModel( | |
| "Educational Content Creator Pro", | |
| "facebook/bart-large", | |
| ModelCategory.EDUCATIONAL_CONTENT, | |
| "Professional educational content generation for all learning levels", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Multilingual Education AI", | |
| "bigscience/bloom-560m", | |
| ModelCategory.EDUCATIONAL_CONTENT, | |
| "Global multilingual educational content for diverse learners", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "Academic Writing Assistant", | |
| "microsoft/prophetnet-large-uncased", | |
| ModelCategory.EDUCATIONAL_CONTENT, | |
| "Academic content creation with advanced text generation capabilities", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| # Lesson Planning & Curriculum Design - Structured & Professional | |
| HFModel( | |
| "Master Lesson Planner", | |
| "facebook/bart-large-cnn", | |
| ModelCategory.LESSON_PLANNING, | |
| "Comprehensive lesson planning with summarization and structure", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Curriculum Architect", | |
| "microsoft/prophetnet-base-uncased", | |
| ModelCategory.CURRICULUM_DESIGN, | |
| "Professional curriculum planning and educational program design", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Activity Designer", | |
| "google/t5-base", | |
| ModelCategory.LESSON_PLANNING, | |
| "Interactive learning activity and exercise generation", | |
| True, | |
| False, | |
| 512, | |
| True, | |
| ), | |
| # Subject-Specific Excellence - STEM Focus | |
| HFModel( | |
| "Programming Mentor Pro", | |
| "microsoft/codebert-base", | |
| ModelCategory.CODING_INSTRUCTION, | |
| "Expert programming education with code analysis and explanation", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Advanced Code Instructor", | |
| "microsoft/graphcodebert-base", | |
| ModelCategory.CODING_INSTRUCTION, | |
| "Advanced programming instruction with graph understanding", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Algorithm Tutor Elite", | |
| "microsoft/unixcoder-base", | |
| ModelCategory.CODING_INSTRUCTION, | |
| "Elite algorithm education and computational thinking development", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| # Science & Mathematics Excellence | |
| HFModel( | |
| "Science Research Educator", | |
| "allenai/scibert_scivocab_uncased", | |
| ModelCategory.SCIENCE_TUTORING, | |
| "Scientific education with research-grade knowledge and vocabulary", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "Advanced Science AI", | |
| "facebook/galactica-125m", | |
| ModelCategory.SCIENCE_TUTORING, | |
| "Advanced scientific knowledge and research methodology education", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "Mathematical Reasoning Master", | |
| "google/flan-t5-xl", | |
| ModelCategory.MATH_TUTORING, | |
| "Advanced mathematical reasoning, proofs, and problem-solving", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "Interactive Math Tutor", | |
| "microsoft/DialoGPT-small", | |
| ModelCategory.MATH_TUTORING, | |
| "Interactive mathematics tutoring with step-by-step explanations", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| # Language & Literature Excellence | |
| HFModel( | |
| "Multilingual Language Master", | |
| "facebook/mbart-large-50-many-to-many-mmt", | |
| ModelCategory.LANGUAGE_TUTORING, | |
| "Advanced multilingual education and cross-language learning", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Literature & Language AI", | |
| "microsoft/prophetnet-large-uncased-cnndm", | |
| ModelCategory.LANGUAGE_TUTORING, | |
| "Literature analysis and advanced language instruction", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Grammar & Comprehension Expert", | |
| "google/electra-base-discriminator", | |
| ModelCategory.LANGUAGE_TUTORING, | |
| "Expert grammar instruction and reading comprehension development", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| # Assessment & Evaluation Excellence | |
| HFModel( | |
| "Assessment Designer Pro", | |
| "microsoft/DialoGPT-large", | |
| ModelCategory.QUIZ_GENERATION, | |
| "Professional assessment and quiz generation with interaction", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "Learning Progress Analyzer", | |
| "facebook/bart-large", | |
| ModelCategory.LEARNING_ASSESSMENT, | |
| "Comprehensive learning assessment and progress tracking", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Question Master AI", | |
| "google/t5-base", | |
| ModelCategory.QUIZ_GENERATION, | |
| "Intelligent question generation for all educational levels", | |
| True, | |
| False, | |
| 512, | |
| True, | |
| ), | |
| HFModel( | |
| "Exam Preparation Specialist", | |
| "microsoft/unilm-base-cased", | |
| ModelCategory.EXAM_PREPARATION, | |
| "Specialized exam preparation and test strategy development", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| # Personalized & Adaptive Learning Excellence | |
| HFModel( | |
| "Personal Learning Architect", | |
| "microsoft/deberta-v3-base", | |
| ModelCategory.PERSONALIZED_LEARNING, | |
| "Advanced personalized learning path creation and optimization", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "Adaptive Learning Engine", | |
| "facebook/opt-125m", | |
| ModelCategory.ADAPTIVE_LEARNING, | |
| "Intelligent adaptive learning with dynamic content adjustment", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "Learning Analytics Expert", | |
| "microsoft/layoutlm-base-uncased", | |
| ModelCategory.LEARNING_ANALYTICS, | |
| "Advanced learning analytics and educational data interpretation", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| # Concept Explanation & Understanding Masters | |
| HFModel( | |
| "Concept Explanation Master", | |
| "microsoft/deberta-v3-base", | |
| ModelCategory.CONCEPT_EXPLANATION, | |
| "Master-level concept explanation and knowledge breakdown", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "Knowledge Synthesizer", | |
| "google/pegasus-xsum", | |
| ModelCategory.CONCEPT_EXPLANATION, | |
| "Advanced knowledge synthesis and concept summarization", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "Interactive Concept Guide", | |
| "facebook/bart-base", | |
| ModelCategory.CONCEPT_EXPLANATION, | |
| "Interactive concept teaching with clarification and examples", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| # Homework & Study Support Excellence | |
| HFModel( | |
| "Programming Homework Expert", | |
| "microsoft/codebert-base-mlm", | |
| ModelCategory.HOMEWORK_ASSISTANCE, | |
| "Expert programming homework assistance and debugging support", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Universal Homework Helper", | |
| "google/flan-t5-small", | |
| ModelCategory.HOMEWORK_ASSISTANCE, | |
| "Comprehensive homework assistance across all academic subjects", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| HFModel( | |
| "Global Study Assistant", | |
| "facebook/mbart-large-cc25", | |
| ModelCategory.HOMEWORK_ASSISTANCE, | |
| "Multilingual homework support with cultural context understanding", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| # Study Materials & Resources Excellence | |
| HFModel( | |
| "Study Guide Architect", | |
| "microsoft/prophetnet-large-uncased", | |
| ModelCategory.STUDY_GUIDE_CREATION, | |
| "Professional study guide creation and learning material development", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Educational Resource Creator", | |
| "facebook/bart-large-xsum", | |
| ModelCategory.STUDY_GUIDE_CREATION, | |
| "Comprehensive educational resource and reference material creation", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| # Interactive Learning & Gamification | |
| HFModel( | |
| "Educational Game Designer", | |
| "microsoft/DialoGPT-base", | |
| ModelCategory.EDUCATIONAL_GAMES, | |
| "Interactive educational games and gamified learning experiences", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| HFModel( | |
| "Learning Game Engine", | |
| "google/bert-base-uncased", | |
| ModelCategory.EDUCATIONAL_GAMES, | |
| "Educational game mechanics and interactive learning systems", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| # History & Social Studies Excellence | |
| HFModel( | |
| "History Professor AI", | |
| "microsoft/deberta-large", | |
| ModelCategory.HISTORY_TUTORING, | |
| "Professor-level historical analysis and social studies education", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Interactive History Guide", | |
| "facebook/opt-350m", | |
| ModelCategory.HISTORY_TUTORING, | |
| "Interactive historical narratives and timeline exploration", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| # Multi-Subject Teaching Excellence | |
| HFModel( | |
| "Master Subject Teacher", | |
| "google/flan-t5-base", | |
| ModelCategory.SUBJECT_TEACHING, | |
| "Expert multi-subject teaching with instruction-following excellence", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| HFModel( | |
| "Universal Educator AI", | |
| "microsoft/unilm-large-cased", | |
| ModelCategory.SUBJECT_TEACHING, | |
| "Universal education AI with cross-disciplinary knowledge", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| # Advanced Analytics & Optimization | |
| HFModel( | |
| "Advanced Learning Analytics", | |
| "microsoft/layoutlm-large-uncased", | |
| ModelCategory.LEARNING_ANALYTICS, | |
| "Enterprise-level learning analytics and educational insights", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "Personalization Engine Pro", | |
| "google/electra-large-discriminator", | |
| ModelCategory.PERSONALIZED_LEARNING, | |
| "Advanced AI personalization with learning style adaptation", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "Global Adaptive System", | |
| "facebook/mbart-large-50", | |
| ModelCategory.ADAPTIVE_LEARNING, | |
| "Global adaptive learning system with multilingual capabilities", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| ] | |
| # Qwen Models - Advanced Reasoning and Multimodal AI | |
| QWEN_MODELS = [ | |
| # Qwen2.5 Series - Latest Models | |
| HFModel( | |
| "Qwen2.5-72B-Instruct", | |
| "Qwen/Qwen2.5-72B-Instruct", | |
| ModelCategory.TEXT_GENERATION, | |
| "Large-scale instruction-following model for complex reasoning", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-32B-Instruct", | |
| "Qwen/Qwen2.5-32B-Instruct", | |
| ModelCategory.TEXT_GENERATION, | |
| "High-performance instruction model for advanced tasks", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-14B-Instruct", | |
| "Qwen/Qwen2.5-14B-Instruct", | |
| ModelCategory.TEXT_GENERATION, | |
| "Efficient large model with excellent reasoning capabilities", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-7B-Instruct", | |
| "Qwen/Qwen2.5-7B-Instruct", | |
| ModelCategory.TEXT_GENERATION, | |
| "Optimized 7B model for general-purpose applications", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-3B-Instruct", | |
| "Qwen/Qwen2.5-3B-Instruct", | |
| ModelCategory.TEXT_GENERATION, | |
| "Lightweight model for resource-constrained environments", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-1.5B-Instruct", | |
| "Qwen/Qwen2.5-1.5B-Instruct", | |
| ModelCategory.TEXT_GENERATION, | |
| "Ultra-lightweight model for edge deployment", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-0.5B-Instruct", | |
| "Qwen/Qwen2.5-0.5B-Instruct", | |
| ModelCategory.TEXT_GENERATION, | |
| "Minimal footprint model for basic applications", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| # Qwen2.5-Coder Series - Programming Specialists | |
| HFModel( | |
| "Qwen2.5-Coder-32B-Instruct", | |
| "Qwen/Qwen2.5-Coder-32B-Instruct", | |
| ModelCategory.QWEN_CODE, | |
| "Advanced code generation and programming assistance", | |
| True, | |
| False, | |
| 131072, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-Coder-14B-Instruct", | |
| "Qwen/Qwen2.5-Coder-14B-Instruct", | |
| ModelCategory.QWEN_CODE, | |
| "Code generation with excellent debugging capabilities", | |
| True, | |
| False, | |
| 131072, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-Coder-7B-Instruct", | |
| "Qwen/Qwen2.5-Coder-7B-Instruct", | |
| ModelCategory.QWEN_CODE, | |
| "Efficient coding assistant for multiple languages", | |
| True, | |
| False, | |
| 131072, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-Coder-3B-Instruct", | |
| "Qwen/Qwen2.5-Coder-3B-Instruct", | |
| ModelCategory.QWEN_CODE, | |
| "Lightweight programming assistant", | |
| True, | |
| False, | |
| 131072, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-Coder-1.5B-Instruct", | |
| "Qwen/Qwen2.5-Coder-1.5B-Instruct", | |
| ModelCategory.QWEN_CODE, | |
| "Compact code generation model", | |
| True, | |
| False, | |
| 131072, | |
| True, | |
| ), | |
| # Qwen2.5-Math Series - Mathematical Reasoning | |
| HFModel( | |
| "Qwen2.5-Math-72B-Instruct", | |
| "Qwen/Qwen2.5-Math-72B-Instruct", | |
| ModelCategory.QWEN_MATH, | |
| "Advanced mathematical problem solving and reasoning", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-Math-7B-Instruct", | |
| "Qwen/Qwen2.5-Math-7B-Instruct", | |
| ModelCategory.QWEN_MATH, | |
| "Mathematical reasoning and calculation assistance", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2.5-Math-1.5B-Instruct", | |
| "Qwen/Qwen2.5-Math-1.5B-Instruct", | |
| ModelCategory.QWEN_MATH, | |
| "Compact mathematical problem solver", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| # QwQ Series - Reasoning Specialists | |
| HFModel( | |
| "QwQ-32B-Preview", | |
| "Qwen/QwQ-32B-Preview", | |
| ModelCategory.QWEN_REASONING, | |
| "Advanced reasoning and logical thinking model", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| # Qwen2-VL Series - Vision-Language Models | |
| HFModel( | |
| "Qwen2-VL-72B-Instruct", | |
| "Qwen/Qwen2-VL-72B-Instruct", | |
| ModelCategory.QWEN_VISION, | |
| "Large-scale vision-language understanding and generation", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2-VL-7B-Instruct", | |
| "Qwen/Qwen2-VL-7B-Instruct", | |
| ModelCategory.QWEN_VISION, | |
| "Efficient vision-language model for multimodal tasks", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen2-VL-2B-Instruct", | |
| "Qwen/Qwen2-VL-2B-Instruct", | |
| ModelCategory.QWEN_VISION, | |
| "Lightweight vision-language model", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| # Qwen2-Audio Series - Audio Understanding | |
| HFModel( | |
| "Qwen2-Audio-7B-Instruct", | |
| "Qwen/Qwen2-Audio-7B-Instruct", | |
| ModelCategory.QWEN_AUDIO, | |
| "Advanced audio understanding and generation", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| # Qwen Legacy Models - Still Powerful | |
| HFModel( | |
| "Qwen1.5-110B-Chat", | |
| "Qwen/Qwen1.5-110B-Chat", | |
| ModelCategory.CONVERSATIONAL, | |
| "Large conversational model with broad knowledge", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen1.5-72B-Chat", | |
| "Qwen/Qwen1.5-72B-Chat", | |
| ModelCategory.CONVERSATIONAL, | |
| "Conversational AI with excellent reasoning", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen1.5-32B-Chat", | |
| "Qwen/Qwen1.5-32B-Chat", | |
| ModelCategory.CONVERSATIONAL, | |
| "Efficient chat model for interactive applications", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen1.5-14B-Chat", | |
| "Qwen/Qwen1.5-14B-Chat", | |
| ModelCategory.CONVERSATIONAL, | |
| "Balanced performance chat model", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen1.5-7B-Chat", | |
| "Qwen/Qwen1.5-7B-Chat", | |
| ModelCategory.CONVERSATIONAL, | |
| "Popular chat model with good performance", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| HFModel( | |
| "Qwen1.5-4B-Chat", | |
| "Qwen/Qwen1.5-4B-Chat", | |
| ModelCategory.CONVERSATIONAL, | |
| "Lightweight conversational AI", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| ] | |
| # DeepSeek Models - Coding and Reasoning Excellence | |
| DEEPSEEK_MODELS = [ | |
| # DeepSeek-V3 Series - Latest Generation | |
| HFModel( | |
| "DeepSeek-V3", | |
| "deepseek-ai/DeepSeek-V3", | |
| ModelCategory.DEEPSEEK_REASONING, | |
| "Latest generation reasoning and knowledge model", | |
| True, | |
| False, | |
| 65536, | |
| True, | |
| ), | |
| HFModel( | |
| "DeepSeek-V3-Base", | |
| "deepseek-ai/DeepSeek-V3-Base", | |
| ModelCategory.TEXT_GENERATION, | |
| "Foundation model for various downstream tasks", | |
| True, | |
| False, | |
| 65536, | |
| True, | |
| ), | |
| # DeepSeek-V2.5 Series | |
| HFModel( | |
| "DeepSeek-V2.5", | |
| "deepseek-ai/DeepSeek-V2.5", | |
| ModelCategory.DEEPSEEK_REASONING, | |
| "Advanced reasoning and general intelligence model", | |
| True, | |
| False, | |
| 32768, | |
| True, | |
| ), | |
| # DeepSeek-Coder Series - Programming Specialists | |
| HFModel( | |
| "DeepSeek-Coder-V2-Instruct", | |
| "deepseek-ai/DeepSeek-Coder-V2-Instruct", | |
| ModelCategory.DEEPSEEK_CODING, | |
| "Advanced code generation and programming assistance", | |
| True, | |
| False, | |
| 163840, | |
| True, | |
| ), | |
| HFModel( | |
| "DeepSeek-Coder-V2-Base", | |
| "deepseek-ai/DeepSeek-Coder-V2-Base", | |
| ModelCategory.DEEPSEEK_CODING, | |
| "Foundation coding model for fine-tuning", | |
| True, | |
| False, | |
| 163840, | |
| True, | |
| ), | |
| HFModel( | |
| "DeepSeek-Coder-33B-Instruct", | |
| "deepseek-ai/deepseek-coder-33b-instruct", | |
| ModelCategory.DEEPSEEK_CODING, | |
| "Large-scale code generation and debugging", | |
| True, | |
| False, | |
| 16384, | |
| True, | |
| ), | |
| HFModel( | |
| "DeepSeek-Coder-6.7B-Instruct", | |
| "deepseek-ai/deepseek-coder-6.7b-instruct", | |
| ModelCategory.DEEPSEEK_CODING, | |
| "Efficient code assistance and generation", | |
| True, | |
| False, | |
| 16384, | |
| True, | |
| ), | |
| HFModel( | |
| "DeepSeek-Coder-1.3B-Instruct", | |
| "deepseek-ai/deepseek-coder-1.3b-instruct", | |
| ModelCategory.DEEPSEEK_CODING, | |
| "Lightweight coding assistant", | |
| True, | |
| False, | |
| 16384, | |
| True, | |
| ), | |
| # DeepSeek-Math Series - Mathematical Reasoning | |
| HFModel( | |
| "DeepSeek-Math-7B-Instruct", | |
| "deepseek-ai/deepseek-math-7b-instruct", | |
| ModelCategory.DEEPSEEK_MATH, | |
| "Mathematical problem solving and reasoning", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| HFModel( | |
| "DeepSeek-Math-7B-Base", | |
| "deepseek-ai/deepseek-math-7b-base", | |
| ModelCategory.DEEPSEEK_MATH, | |
| "Foundation model for mathematical reasoning", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| # DeepSeek Chat Models | |
| HFModel( | |
| "DeepSeek-67B-Chat", | |
| "deepseek-ai/deepseek-llm-67b-chat", | |
| ModelCategory.CONVERSATIONAL, | |
| "Large conversational model with strong reasoning", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| HFModel( | |
| "DeepSeek-7B-Chat", | |
| "deepseek-ai/deepseek-llm-7b-chat", | |
| ModelCategory.CONVERSATIONAL, | |
| "Efficient chat model for general conversations", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| # DeepSeek-VL Series - Vision-Language | |
| HFModel( | |
| "DeepSeek-VL-7B-Chat", | |
| "deepseek-ai/deepseek-vl-7b-chat", | |
| ModelCategory.VISION_LANGUAGE, | |
| "Vision-language understanding and conversation", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| HFModel( | |
| "DeepSeek-VL-1.3B-Chat", | |
| "deepseek-ai/deepseek-vl-1.3b-chat", | |
| ModelCategory.VISION_LANGUAGE, | |
| "Lightweight vision-language model", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| ] | |
| # Advanced Image Editing Models | |
| IMAGE_EDITING_MODELS = [ | |
| # Professional Image Editing | |
| HFModel( | |
| "SDXL Inpainting", | |
| "diffusers/stable-diffusion-xl-1.0-inpainting-0.1", | |
| ModelCategory.IMAGE_EDITING, | |
| "High-quality image inpainting and editing", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "ControlNet Inpainting", | |
| "lllyasviel/control_v11p_sd15_inpaint", | |
| ModelCategory.IMAGE_EDITING, | |
| "Controllable image inpainting with precise editing", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "InstantID Face Editor", | |
| "InstantX/InstantID", | |
| ModelCategory.FACE_ENHANCEMENT, | |
| "Identity-preserving face editing and enhancement", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "Real-ESRGAN Upscaler", | |
| "ai-forever/Real-ESRGAN", | |
| ModelCategory.IMAGE_UPSCALING, | |
| "Advanced image super-resolution and enhancement", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "GFPGAN Face Restoration", | |
| "Xintao/GFPGAN", | |
| ModelCategory.FACE_RESTORATION, | |
| "High-quality face restoration and enhancement", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "CodeFormer Face Restoration", | |
| "sczhou/CodeFormer", | |
| ModelCategory.FACE_RESTORATION, | |
| "Robust face restoration for low-quality images", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "Background Removal", | |
| "briaai/RMBG-1.4", | |
| ModelCategory.BACKGROUND_REMOVAL, | |
| "Precise background removal and segmentation", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "U2-Net Background Removal", | |
| "simonw/u2net-portrait-segmentation", | |
| ModelCategory.BACKGROUND_REMOVAL, | |
| "Portrait and object background removal", | |
| True, | |
| False, | |
| 320, | |
| False, | |
| ), | |
| HFModel( | |
| "Photo Colorization", | |
| "microsoft/beit-base-patch16-224-pt22k-ft22k", | |
| ModelCategory.COLOR_CORRECTION, | |
| "AI-powered photo colorization and enhancement", | |
| True, | |
| False, | |
| 224, | |
| False, | |
| ), | |
| HFModel( | |
| "Style Transfer Neural", | |
| "pytorch/vision", | |
| ModelCategory.ARTISTIC_FILTER, | |
| "Neural style transfer for artistic image effects", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| ] | |
| # Face Swap and Manipulation Models | |
| FACE_SWAP_MODELS = [ | |
| # Advanced Face Swapping | |
| HFModel( | |
| "InsightFace SwapFace", | |
| "deepinsight/inswapper_128.onnx", | |
| ModelCategory.FACE_SWAP, | |
| "High-quality face swapping with identity preservation", | |
| True, | |
| False, | |
| 128, | |
| False, | |
| ), | |
| HFModel( | |
| "SimSwap Face Swap", | |
| "ppogg/simswap_official", | |
| ModelCategory.FACE_SWAP, | |
| "Realistic face swapping for videos and images", | |
| True, | |
| False, | |
| 224, | |
| False, | |
| ), | |
| HFModel( | |
| "FaceX-Zoo Face Swap", | |
| "FacePerceiver/FaceX-Zoo", | |
| ModelCategory.FACE_SWAP, | |
| "Multi-purpose face analysis and swapping toolkit", | |
| True, | |
| False, | |
| 112, | |
| False, | |
| ), | |
| HFModel( | |
| "Face Enhancement Pro", | |
| "TencentARC/GFPGAN", | |
| ModelCategory.FACE_ENHANCEMENT, | |
| "Professional face enhancement and restoration", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "DualStyleGAN Face Edit", | |
| "williamyang1991/DualStyleGAN", | |
| ModelCategory.FACE_ENHANCEMENT, | |
| "Style-controllable face image editing", | |
| True, | |
| False, | |
| 1024, | |
| False, | |
| ), | |
| HFModel( | |
| "MegaPortraits Face Animate", | |
| "NVlabs/MegaPortraits", | |
| ModelCategory.FACIAL_ANIMATION, | |
| "One-shot facial animation and expression transfer", | |
| True, | |
| False, | |
| 256, | |
| False, | |
| ), | |
| ] | |
| # Advanced TTS and STT Models | |
| ADVANCED_SPEECH_MODELS = [ | |
| # Multilingual Text-to-Speech | |
| HFModel( | |
| "XTTS v2 Multilingual", | |
| "coqui/XTTS-v2", | |
| ModelCategory.MULTILINGUAL_TTS, | |
| "High-quality multilingual text-to-speech with voice cloning", | |
| True, | |
| False, | |
| 24000, | |
| True, | |
| ), | |
| HFModel( | |
| "Bark Text-to-Speech", | |
| "suno/bark", | |
| ModelCategory.ADVANCED_TTS, | |
| "Generative TTS with music, sound effects, and multiple speakers", | |
| True, | |
| False, | |
| 24000, | |
| False, | |
| ), | |
| HFModel( | |
| "SpeechT5 TTS", | |
| "microsoft/speecht5_tts", | |
| ModelCategory.ADVANCED_TTS, | |
| "High-quality neural text-to-speech synthesis", | |
| True, | |
| False, | |
| 16000, | |
| False, | |
| ), | |
| HFModel( | |
| "VALL-E X Multilingual", | |
| "Plachtaa/VALL-E-X", | |
| ModelCategory.MULTILINGUAL_TTS, | |
| "Zero-shot voice synthesis in multiple languages", | |
| True, | |
| False, | |
| 24000, | |
| False, | |
| ), | |
| HFModel( | |
| "Arabic TTS", | |
| "arabic-speech-corpus/tts-arabic", | |
| ModelCategory.MULTILINGUAL_TTS, | |
| "High-quality Arabic text-to-speech synthesis", | |
| True, | |
| False, | |
| 22050, | |
| False, | |
| ), | |
| HFModel( | |
| "Tortoise TTS", | |
| "jbetker/tortoise-tts", | |
| ModelCategory.VOICE_CLONING, | |
| "High-quality voice cloning and synthesis", | |
| True, | |
| False, | |
| 22050, | |
| False, | |
| ), | |
| # Advanced Speech-to-Text | |
| HFModel( | |
| "Whisper Large v3", | |
| "openai/whisper-large-v3", | |
| ModelCategory.MULTILINGUAL_STT, | |
| "State-of-the-art multilingual speech recognition", | |
| True, | |
| False, | |
| 30, | |
| False, | |
| ), | |
| HFModel( | |
| "Whisper Large v3 Turbo", | |
| "openai/whisper-large-v3-turbo", | |
| ModelCategory.MULTILINGUAL_STT, | |
| "Fast multilingual speech recognition with high accuracy", | |
| True, | |
| False, | |
| 30, | |
| True, | |
| ), | |
| HFModel( | |
| "Arabic Whisper", | |
| "arabic-speech-corpus/whisper-large-arabic", | |
| ModelCategory.MULTILINGUAL_STT, | |
| "Optimized Arabic speech recognition model", | |
| True, | |
| False, | |
| 30, | |
| False, | |
| ), | |
| HFModel( | |
| "MMS Speech Recognition", | |
| "facebook/mms-1b-all", | |
| ModelCategory.MULTILINGUAL_STT, | |
| "Massively multilingual speech recognition (1000+ languages)", | |
| True, | |
| False, | |
| 16000, | |
| False, | |
| ), | |
| HFModel( | |
| "Wav2Vec2 Arabic", | |
| "facebook/wav2vec2-large-xlsr-53-arabic", | |
| ModelCategory.MULTILINGUAL_STT, | |
| "Arabic speech recognition with Wav2Vec2 architecture", | |
| True, | |
| False, | |
| 16000, | |
| False, | |
| ), | |
| HFModel( | |
| "SpeechT5 ASR", | |
| "microsoft/speecht5_asr", | |
| ModelCategory.ADVANCED_STT, | |
| "Advanced automatic speech recognition", | |
| True, | |
| False, | |
| 16000, | |
| False, | |
| ), | |
| # Real-time Translation and Voice Conversion | |
| HFModel( | |
| "SeamlessM4T", | |
| "facebook/seamless-m4t-v2-large", | |
| ModelCategory.REAL_TIME_TRANSLATION, | |
| "Multilingual speech-to-speech translation", | |
| True, | |
| False, | |
| 16000, | |
| True, | |
| ), | |
| HFModel( | |
| "Voice Conversion VITS", | |
| "jaywalnut310/vits-ljs", | |
| ModelCategory.VOICE_CONVERSION, | |
| "High-quality voice conversion and synthesis", | |
| True, | |
| False, | |
| 22050, | |
| False, | |
| ), | |
| HFModel( | |
| "RVC Voice Clone", | |
| "lj1995/GPT-SoVITS", | |
| ModelCategory.VOICE_CLONING, | |
| "Real-time voice cloning and conversion", | |
| True, | |
| False, | |
| 32000, | |
| True, | |
| ), | |
| ] | |
| # Talking Avatar and Video Generation Models | |
| TALKING_AVATAR_MODELS = [ | |
| # Talking Head Generation | |
| HFModel( | |
| "SadTalker Talking Head", | |
| "vinthony/SadTalker", | |
| ModelCategory.TALKING_AVATAR, | |
| "Generate talking head videos from audio and single image", | |
| True, | |
| False, | |
| 256, | |
| False, | |
| ), | |
| HFModel( | |
| "Real-Time Face Animation", | |
| "PaddlePaddle/PaddleGAN-FOM", | |
| ModelCategory.FACIAL_ANIMATION, | |
| "Real-time facial animation and expression control", | |
| True, | |
| False, | |
| 256, | |
| True, | |
| ), | |
| HFModel( | |
| "LivePortrait Animation", | |
| "KwaiVGI/LivePortrait", | |
| ModelCategory.TALKING_AVATAR, | |
| "High-quality portrait animation with lip sync", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "DualTalker Video", | |
| "OpenTalker/DualTalker", | |
| ModelCategory.TALKING_AVATAR, | |
| "Dual-modal talking face generation with enhanced quality", | |
| True, | |
| False, | |
| 256, | |
| False, | |
| ), | |
| HFModel( | |
| "Video Retalking", | |
| "vinthony/video-retalking", | |
| ModelCategory.LIP_SYNC, | |
| "Audio-driven lip sync for existing videos", | |
| True, | |
| False, | |
| 224, | |
| False, | |
| ), | |
| HFModel( | |
| "Wav2Lip Lip Sync", | |
| "Rudrabha/Wav2Lip", | |
| ModelCategory.LIP_SYNC, | |
| "Accurate lip sync generation from audio", | |
| True, | |
| False, | |
| 96, | |
| False, | |
| ), | |
| HFModel( | |
| "Digital Human Avatar", | |
| "modelscope/damo-text-to-video-synthesis", | |
| ModelCategory.VIRTUAL_PRESENTER, | |
| "Generate digital human presenter videos", | |
| True, | |
| False, | |
| 320, | |
| False, | |
| ), | |
| HFModel( | |
| "AI News Anchor", | |
| "microsoft/DiT-XL-2-256", | |
| ModelCategory.AI_ANCHOR, | |
| "Professional AI news anchor and presenter generation", | |
| True, | |
| False, | |
| 256, | |
| False, | |
| ), | |
| HFModel( | |
| "Avatar Gesture Control", | |
| "ZhengPeng7/BiSeNet", | |
| ModelCategory.GESTURE_GENERATION, | |
| "Generate natural gestures and body language for avatars", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| ] | |
| # Interactive Language Models (English-Arabic Focus) | |
| INTERACTIVE_LANGUAGE_MODELS = [ | |
| # Bilingual Conversation Models | |
| HFModel( | |
| "AceGPT Arabic-English", | |
| "FreedomIntelligence/AceGPT-13B", | |
| ModelCategory.BILINGUAL_CONVERSATION, | |
| "Bilingual Arabic-English conversation model", | |
| True, | |
| False, | |
| 4096, | |
| True, | |
| ), | |
| HFModel( | |
| "Jais Arabic Chat", | |
| "core42/jais-13b-chat", | |
| ModelCategory.BILINGUAL_CONVERSATION, | |
| "Advanced Arabic conversation model with English support", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "AraBART Conversational", | |
| "aubmindlab/arabart-base-conversational", | |
| ModelCategory.BILINGUAL_CONVERSATION, | |
| "Arabic conversational AI with cultural understanding", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| HFModel( | |
| "Multilingual Chat Assistant", | |
| "microsoft/DialoGPT-large", | |
| ModelCategory.INTERACTIVE_CHAT, | |
| "Interactive chat assistant supporting multiple languages", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| HFModel( | |
| "Cultural Context Chat", | |
| "bigscience/bloom-7b1", | |
| ModelCategory.CULTURAL_ADAPTATION, | |
| "Culturally aware conversation model for diverse contexts", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "Context-Aware Assistant", | |
| "microsoft/GODEL-v1_1-large-seq2seq", | |
| ModelCategory.CONTEXT_AWARE_CHAT, | |
| "Context-aware conversational AI with memory", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| HFModel( | |
| "Personality Chat Bot", | |
| "microsoft/PersonaGPT", | |
| ModelCategory.PERSONALITY_CHAT, | |
| "Personality-driven conversational AI with distinct characters", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| HFModel( | |
| "Role-Play Assistant", | |
| "PygmalionAI/pygmalion-6b", | |
| ModelCategory.ROLE_PLAY_CHAT, | |
| "Interactive role-playing conversation model", | |
| True, | |
| False, | |
| 2048, | |
| True, | |
| ), | |
| HFModel( | |
| "Domain Expert Chat", | |
| "microsoft/DialoGPT-medium", | |
| ModelCategory.DOMAIN_SPECIFIC_CHAT, | |
| "Specialized domain conversation assistant", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| # Arabic Language Specialists | |
| HFModel( | |
| "Arabic GPT-J", | |
| "aubmindlab/aragpt2-base", | |
| ModelCategory.BILINGUAL_CONVERSATION, | |
| "Arabic language generation and conversation", | |
| True, | |
| False, | |
| 1024, | |
| True, | |
| ), | |
| HFModel( | |
| "Marbert Arabic Chat", | |
| "UBC-NLP/MARBERT", | |
| ModelCategory.BILINGUAL_CONVERSATION, | |
| "Dialectal Arabic conversation model", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| HFModel( | |
| "ArabicBERT Chat", | |
| "aubmindlab/bert-base-arabertv2", | |
| ModelCategory.BILINGUAL_CONVERSATION, | |
| "Modern Standard Arabic conversational understanding", | |
| True, | |
| False, | |
| 512, | |
| False, | |
| ), | |
| ] | |
| class HuggingFaceInference: | |
| """Hugging Face Inference API integration""" | |
| def __init__( | |
| self, | |
| api_token: str, | |
| base_url: str = "https://api-inference.huggingface.co/models/", | |
| ): | |
| self.api_token = api_token | |
| self.base_url = base_url | |
| self.session = None | |
| async def __aenter__(self): | |
| self.session = aiohttp.ClientSession( | |
| headers={"Authorization": f"Bearer {self.api_token}"}, | |
| timeout=aiohttp.ClientTimeout(total=300), # 5 minutes timeout | |
| ) | |
| return self | |
| async def __aexit__(self, exc_type, exc_val, exc_tb): | |
| if self.session: | |
| await self.session.close() | |
| async def text_generation( | |
| self, | |
| model_id: str, | |
| prompt: str, | |
| max_tokens: int = 100, | |
| temperature: float = 0.7, | |
| stream: bool = False, | |
| **kwargs, | |
| ) -> Dict[str, Any]: | |
| """Generate text using a text generation model""" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_new_tokens": max_tokens, | |
| "temperature": temperature, | |
| "do_sample": True, | |
| **kwargs, | |
| }, | |
| "options": {"use_cache": False}, | |
| } | |
| if stream: | |
| return await self._stream_request(model_id, payload) | |
| else: | |
| return await self._request(model_id, payload) | |
| async def text_to_image( | |
| self, | |
| model_id: str, | |
| prompt: str, | |
| negative_prompt: Optional[str] = None, | |
| **kwargs, | |
| ) -> bytes: | |
| """Generate image from text prompt""" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| **({"negative_prompt": negative_prompt} if negative_prompt else {}), | |
| **kwargs, | |
| }, | |
| } | |
| response = await self._request(model_id, payload, expect_json=False) | |
| return response | |
| async def automatic_speech_recognition( | |
| self, model_id: str, audio_data: bytes, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Transcribe audio to text""" | |
| # Convert audio bytes to base64 for API | |
| audio_b64 = base64.b64encode(audio_data).decode() | |
| payload = {"inputs": audio_b64, "parameters": kwargs} | |
| return await self._request(model_id, payload) | |
| async def text_to_speech(self, model_id: str, text: str, **kwargs) -> bytes: | |
| """Convert text to speech audio""" | |
| payload = {"inputs": text, "parameters": kwargs} | |
| response = await self._request(model_id, payload, expect_json=False) | |
| return response | |
| async def image_classification( | |
| self, model_id: str, image_data: bytes, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Classify images""" | |
| # Convert image to base64 | |
| image_b64 = base64.b64encode(image_data).decode() | |
| payload = {"inputs": image_b64, "parameters": kwargs} | |
| return await self._request(model_id, payload) | |
| async def feature_extraction( | |
| self, model_id: str, texts: Union[str, List[str]], **kwargs | |
| ) -> Dict[str, Any]: | |
| """Extract embeddings from text""" | |
| payload = {"inputs": texts, "parameters": kwargs} | |
| return await self._request(model_id, payload) | |
| async def translation( | |
| self, | |
| model_id: str, | |
| text: str, | |
| src_lang: Optional[str] = None, | |
| tgt_lang: Optional[str] = None, | |
| **kwargs, | |
| ) -> Dict[str, Any]: | |
| """Translate text between languages""" | |
| payload = { | |
| "inputs": text, | |
| "parameters": { | |
| **({"src_lang": src_lang} if src_lang else {}), | |
| **({"tgt_lang": tgt_lang} if tgt_lang else {}), | |
| **kwargs, | |
| }, | |
| } | |
| return await self._request(model_id, payload) | |
| async def summarization( | |
| self, | |
| model_id: str, | |
| text: str, | |
| max_length: int = 150, | |
| min_length: int = 30, | |
| **kwargs, | |
| ) -> Dict[str, Any]: | |
| """Summarize text""" | |
| payload = { | |
| "inputs": text, | |
| "parameters": { | |
| "max_length": max_length, | |
| "min_length": min_length, | |
| **kwargs, | |
| }, | |
| } | |
| return await self._request(model_id, payload) | |
| async def question_answering( | |
| self, model_id: str, question: str, context: str, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Answer questions based on context""" | |
| payload = { | |
| "inputs": {"question": question, "context": context}, | |
| "parameters": kwargs, | |
| } | |
| return await self._request(model_id, payload) | |
| async def zero_shot_classification( | |
| self, model_id: str, text: str, candidate_labels: List[str], **kwargs | |
| ) -> Dict[str, Any]: | |
| """Classify text without training data""" | |
| payload = { | |
| "inputs": text, | |
| "parameters": {"candidate_labels": candidate_labels, **kwargs}, | |
| } | |
| return await self._request(model_id, payload) | |
| async def conversational( | |
| self, | |
| model_id: str, | |
| text: str, | |
| conversation_history: Optional[List[Dict[str, str]]] = None, | |
| **kwargs, | |
| ) -> Dict[str, Any]: | |
| """Have a conversation with a model""" | |
| payload = { | |
| "inputs": { | |
| "text": text, | |
| **( | |
| { | |
| "past_user_inputs": [ | |
| h["user"] for h in conversation_history if "user" in h | |
| ] | |
| } | |
| if conversation_history | |
| else {} | |
| ), | |
| **( | |
| { | |
| "generated_responses": [ | |
| h["bot"] for h in conversation_history if "bot" in h | |
| ] | |
| } | |
| if conversation_history | |
| else {} | |
| ), | |
| }, | |
| "parameters": kwargs, | |
| } | |
| return await self._request(model_id, payload) | |
| async def _request( | |
| self, model_id: str, payload: Dict[str, Any], expect_json: bool = True | |
| ) -> Union[Dict[str, Any], bytes]: | |
| """Make HTTP request to Hugging Face API""" | |
| url = f"{self.base_url}{model_id}" | |
| try: | |
| async with self.session.post(url, json=payload) as response: | |
| if response.status == 200: | |
| if expect_json: | |
| return await response.json() | |
| else: | |
| return await response.read() | |
| elif response.status == 503: | |
| # Model is loading, wait and retry | |
| error_info = await response.json() | |
| estimated_time = error_info.get("estimated_time", 30) | |
| logger.info( | |
| f"Model {model_id} is loading, waiting {estimated_time}s" | |
| ) | |
| await asyncio.sleep(min(estimated_time, 60)) # Cap at 60 seconds | |
| return await self._request(model_id, payload, expect_json) | |
| else: | |
| error_text = await response.text() | |
| raise Exception( | |
| f"API request failed with status {response.status}: {error_text}" | |
| ) | |
| except Exception as e: | |
| logger.error(f"Error calling Hugging Face API for {model_id}: {e}") | |
| raise | |
| async def _stream_request(self, model_id: str, payload: Dict[str, Any]): | |
| """Stream response from Hugging Face API""" | |
| url = f"{self.base_url}{model_id}" | |
| payload["stream"] = True | |
| try: | |
| async with self.session.post(url, json=payload) as response: | |
| if response.status == 200: | |
| async for chunk in response.content: | |
| if chunk: | |
| yield chunk.decode("utf-8") | |
| else: | |
| error_text = await response.text() | |
| raise Exception( | |
| f"Streaming request failed with status {response.status}: {error_text}" | |
| ) | |
| except Exception as e: | |
| logger.error(f"Error streaming from Hugging Face API for {model_id}: {e}") | |
| raise | |
| # New methods for expanded model categories | |
| async def text_to_video( | |
| self, model_id: str, prompt: str, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Generate video from text prompt""" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "duration": kwargs.get("duration", 5), | |
| "fps": kwargs.get("fps", 24), | |
| "width": kwargs.get("width", 512), | |
| "height": kwargs.get("height", 512), | |
| **kwargs, | |
| }, | |
| } | |
| return await self._request(model_id, payload) | |
| async def video_to_text( | |
| self, model_id: str, video_data: bytes, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Analyze video and generate text description""" | |
| video_b64 = base64.b64encode(video_data).decode() | |
| payload = { | |
| "inputs": {"video": video_b64}, | |
| "parameters": kwargs, | |
| } | |
| return await self._request(model_id, payload) | |
| async def code_generation( | |
| self, model_id: str, prompt: str, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Generate code from natural language prompt""" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_length": kwargs.get("max_length", 500), | |
| "temperature": kwargs.get("temperature", 0.2), | |
| "language": kwargs.get("language", "python"), | |
| **kwargs, | |
| }, | |
| } | |
| return await self._request(model_id, payload) | |
| async def code_completion( | |
| self, model_id: str, code: str, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Complete partial code""" | |
| payload = { | |
| "inputs": code, | |
| "parameters": { | |
| "max_length": kwargs.get("max_length", 100), | |
| "temperature": kwargs.get("temperature", 0.1), | |
| **kwargs, | |
| }, | |
| } | |
| return await self._request(model_id, payload) | |
| async def text_to_3d(self, model_id: str, prompt: str, **kwargs) -> Dict[str, Any]: | |
| """Generate 3D model from text description""" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "resolution": kwargs.get("resolution", 64), | |
| "format": kwargs.get("format", "obj"), | |
| **kwargs, | |
| }, | |
| } | |
| return await self._request(model_id, payload) | |
| async def image_to_3d( | |
| self, model_id: str, image_data: bytes, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Generate 3D model from image""" | |
| image_b64 = base64.b64encode(image_data).decode() | |
| payload = { | |
| "inputs": {"image": image_b64}, | |
| "parameters": kwargs, | |
| } | |
| return await self._request(model_id, payload) | |
| async def ocr(self, model_id: str, image_data: bytes, **kwargs) -> Dict[str, Any]: | |
| """Perform optical character recognition on image""" | |
| image_b64 = base64.b64encode(image_data).decode() | |
| payload = { | |
| "inputs": {"image": image_b64}, | |
| "parameters": {"language": kwargs.get("language", "en"), **kwargs}, | |
| } | |
| return await self._request(model_id, payload) | |
| async def document_analysis( | |
| self, model_id: str, document_data: bytes, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Analyze document structure and content""" | |
| doc_b64 = base64.b64encode(document_data).decode() | |
| payload = { | |
| "inputs": {"document": doc_b64}, | |
| "parameters": kwargs, | |
| } | |
| return await self._request(model_id, payload) | |
| async def vision_language( | |
| self, model_id: str, image_data: bytes, text: str, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Process image and text together""" | |
| image_b64 = base64.b64encode(image_data).decode() | |
| payload = { | |
| "inputs": {"image": image_b64, "text": text}, | |
| "parameters": kwargs, | |
| } | |
| return await self._request(model_id, payload) | |
| async def multimodal_reasoning( | |
| self, model_id: str, inputs: Dict[str, Any], **kwargs | |
| ) -> Dict[str, Any]: | |
| """Perform reasoning across multiple modalities""" | |
| payload = { | |
| "inputs": inputs, | |
| "parameters": kwargs, | |
| } | |
| return await self._request(model_id, payload) | |
| async def music_generation( | |
| self, model_id: str, prompt: str, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Generate music from text prompt""" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "duration": kwargs.get("duration", 30), | |
| "bpm": kwargs.get("bpm", 120), | |
| "genre": kwargs.get("genre", "electronic"), | |
| **kwargs, | |
| }, | |
| } | |
| return await self._request(model_id, payload) | |
| async def voice_cloning( | |
| self, model_id: str, text: str, voice_sample: bytes, **kwargs | |
| ) -> bytes: | |
| """Clone voice and synthesize speech""" | |
| voice_b64 = base64.b64encode(voice_sample).decode() | |
| payload = { | |
| "inputs": {"text": text, "voice_sample": voice_b64}, | |
| "parameters": kwargs, | |
| } | |
| return await self._request(model_id, payload, expect_json=False) | |
| async def super_resolution( | |
| self, model_id: str, image_data: bytes, **kwargs | |
| ) -> bytes: | |
| """Enhance image resolution""" | |
| image_b64 = base64.b64encode(image_data).decode() | |
| payload = { | |
| "inputs": {"image": image_b64}, | |
| "parameters": {"scale_factor": kwargs.get("scale_factor", 4), **kwargs}, | |
| } | |
| return await self._request(model_id, payload, expect_json=False) | |
| async def background_removal( | |
| self, model_id: str, image_data: bytes, **kwargs | |
| ) -> bytes: | |
| """Remove background from image""" | |
| image_b64 = base64.b64encode(image_data).decode() | |
| payload = { | |
| "inputs": {"image": image_b64}, | |
| "parameters": kwargs, | |
| } | |
| return await self._request(model_id, payload, expect_json=False) | |
| async def creative_writing( | |
| self, model_id: str, prompt: str, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Generate creative content""" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_length": kwargs.get("max_length", 1000), | |
| "creativity": kwargs.get("creativity", 0.8), | |
| "genre": kwargs.get("genre", "general"), | |
| **kwargs, | |
| }, | |
| } | |
| return await self._request(model_id, payload) | |
| async def business_document( | |
| self, model_id: str, document_type: str, context: str, **kwargs | |
| ) -> Dict[str, Any]: | |
| """Generate business documents""" | |
| payload = { | |
| "inputs": f"Generate {document_type}: {context}", | |
| "parameters": { | |
| "format": kwargs.get("format", "professional"), | |
| "length": kwargs.get("length", "medium"), | |
| **kwargs, | |
| }, | |
| } | |
| return await self._request(model_id, payload) | |
| class HuggingFaceModelManager: | |
| """Manager for all Hugging Face model operations""" | |
| def __init__(self, api_token: str): | |
| self.api_token = api_token | |
| self.models = HuggingFaceModels() | |
| def get_models_by_category(self, category: ModelCategory) -> List[HFModel]: | |
| """Get all models for a specific category""" | |
| all_models = [] | |
| if category == ModelCategory.TEXT_GENERATION: | |
| all_models = self.models.TEXT_GENERATION_MODELS | |
| elif category == ModelCategory.TEXT_TO_IMAGE: | |
| all_models = self.models.TEXT_TO_IMAGE_MODELS | |
| elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: | |
| all_models = self.models.ASR_MODELS | |
| elif category == ModelCategory.TEXT_TO_SPEECH: | |
| all_models = self.models.TTS_MODELS | |
| elif category == ModelCategory.IMAGE_CLASSIFICATION: | |
| all_models = self.models.IMAGE_CLASSIFICATION_MODELS | |
| elif category == ModelCategory.FEATURE_EXTRACTION: | |
| all_models = self.models.FEATURE_EXTRACTION_MODELS | |
| elif category == ModelCategory.TRANSLATION: | |
| all_models = self.models.TRANSLATION_MODELS | |
| elif category == ModelCategory.SUMMARIZATION: | |
| all_models = self.models.SUMMARIZATION_MODELS | |
| return all_models | |
| def get_all_models(self) -> Dict[ModelCategory, List[HFModel]]: | |
| """Get all available models organized by category""" | |
| return { | |
| # Core AI categories | |
| ModelCategory.TEXT_GENERATION: self.models.TEXT_GENERATION_MODELS, | |
| ModelCategory.TEXT_TO_IMAGE: self.models.TEXT_TO_IMAGE_MODELS, | |
| ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: self.models.ASR_MODELS, | |
| ModelCategory.TEXT_TO_SPEECH: self.models.TTS_MODELS, | |
| ModelCategory.IMAGE_CLASSIFICATION: self.models.IMAGE_CLASSIFICATION_MODELS, | |
| ModelCategory.FEATURE_EXTRACTION: self.models.FEATURE_EXTRACTION_MODELS, | |
| ModelCategory.TRANSLATION: self.models.TRANSLATION_MODELS, | |
| ModelCategory.SUMMARIZATION: self.models.SUMMARIZATION_MODELS, | |
| # Video and Motion | |
| ModelCategory.TEXT_TO_VIDEO: self.models.VIDEO_GENERATION_MODELS, | |
| ModelCategory.VIDEO_GENERATION: self.models.VIDEO_GENERATION_MODELS, | |
| ModelCategory.VIDEO_TO_TEXT: self.models.VIDEO_GENERATION_MODELS, | |
| ModelCategory.VIDEO_CLASSIFICATION: self.models.VIDEO_GENERATION_MODELS, | |
| # Code and Development | |
| ModelCategory.CODE_GENERATION: self.models.CODE_GENERATION_MODELS, | |
| ModelCategory.CODE_COMPLETION: self.models.CODE_GENERATION_MODELS, | |
| ModelCategory.CODE_EXPLANATION: self.models.CODE_GENERATION_MODELS, | |
| ModelCategory.APP_GENERATION: self.models.CODE_GENERATION_MODELS, | |
| # 3D and AR/VR | |
| ModelCategory.TEXT_TO_3D: self.models.THREE_D_MODELS, | |
| ModelCategory.IMAGE_TO_3D: self.models.THREE_D_MODELS, | |
| ModelCategory.THREE_D_GENERATION: self.models.THREE_D_MODELS, | |
| ModelCategory.MESH_GENERATION: self.models.THREE_D_MODELS, | |
| # Document Processing | |
| ModelCategory.OCR: self.models.DOCUMENT_PROCESSING_MODELS, | |
| ModelCategory.DOCUMENT_ANALYSIS: self.models.DOCUMENT_PROCESSING_MODELS, | |
| ModelCategory.HANDWRITING_RECOGNITION: self.models.DOCUMENT_PROCESSING_MODELS, | |
| ModelCategory.TABLE_EXTRACTION: self.models.DOCUMENT_PROCESSING_MODELS, | |
| ModelCategory.FORM_PROCESSING: self.models.DOCUMENT_PROCESSING_MODELS, | |
| # Multimodal AI | |
| ModelCategory.VISION_LANGUAGE: self.models.MULTIMODAL_MODELS, | |
| ModelCategory.MULTIMODAL_REASONING: self.models.MULTIMODAL_MODELS, | |
| ModelCategory.VISUAL_QUESTION_ANSWERING: self.models.MULTIMODAL_MODELS, | |
| ModelCategory.MULTIMODAL_CHAT: self.models.MULTIMODAL_MODELS, | |
| ModelCategory.CROSS_MODAL_GENERATION: self.models.MULTIMODAL_MODELS, | |
| # Specialized AI | |
| ModelCategory.MUSIC_GENERATION: self.models.SPECIALIZED_AI_MODELS, | |
| ModelCategory.VOICE_CLONING: self.models.SPECIALIZED_AI_MODELS, | |
| ModelCategory.SUPER_RESOLUTION: self.models.SPECIALIZED_AI_MODELS, | |
| ModelCategory.FACE_RESTORATION: self.models.SPECIALIZED_AI_MODELS, | |
| ModelCategory.IMAGE_INPAINTING: self.models.SPECIALIZED_AI_MODELS, | |
| ModelCategory.BACKGROUND_REMOVAL: self.models.SPECIALIZED_AI_MODELS, | |
| # Creative Content | |
| ModelCategory.CREATIVE_WRITING: self.models.CREATIVE_CONTENT_MODELS, | |
| ModelCategory.STORY_GENERATION: self.models.CREATIVE_CONTENT_MODELS, | |
| ModelCategory.POETRY_GENERATION: self.models.CREATIVE_CONTENT_MODELS, | |
| ModelCategory.BLOG_WRITING: self.models.CREATIVE_CONTENT_MODELS, | |
| ModelCategory.MARKETING_COPY: self.models.CREATIVE_CONTENT_MODELS, | |
| # Game Development | |
| ModelCategory.GAME_ASSET_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, | |
| ModelCategory.CHARACTER_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, | |
| ModelCategory.LEVEL_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, | |
| ModelCategory.DIALOGUE_GENERATION: self.models.GAME_DEVELOPMENT_MODELS, | |
| # Science and Research | |
| ModelCategory.PROTEIN_FOLDING: self.models.SCIENCE_RESEARCH_MODELS, | |
| ModelCategory.MOLECULE_GENERATION: self.models.SCIENCE_RESEARCH_MODELS, | |
| ModelCategory.SCIENTIFIC_WRITING: self.models.SCIENCE_RESEARCH_MODELS, | |
| ModelCategory.RESEARCH_ASSISTANCE: self.models.SCIENCE_RESEARCH_MODELS, | |
| ModelCategory.DATA_ANALYSIS: self.models.SCIENCE_RESEARCH_MODELS, | |
| # Business and Productivity | |
| ModelCategory.EMAIL_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, | |
| ModelCategory.PRESENTATION_CREATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, | |
| ModelCategory.REPORT_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, | |
| ModelCategory.MEETING_SUMMARIZATION: self.models.BUSINESS_PRODUCTIVITY_MODELS, | |
| ModelCategory.PROJECT_PLANNING: self.models.BUSINESS_PRODUCTIVITY_MODELS, | |
| # AI Teacher and Education Models | |
| ModelCategory.AI_TUTORING: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.EDUCATIONAL_CONTENT: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.LESSON_PLANNING: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.CONCEPT_EXPLANATION: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.HOMEWORK_ASSISTANCE: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.QUIZ_GENERATION: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.CURRICULUM_DESIGN: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.LEARNING_ASSESSMENT: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.ADAPTIVE_LEARNING: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.SUBJECT_TEACHING: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.MATH_TUTORING: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.SCIENCE_TUTORING: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.LANGUAGE_TUTORING: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.HISTORY_TUTORING: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.CODING_INSTRUCTION: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.EXAM_PREPARATION: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.STUDY_GUIDE_CREATION: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.EDUCATIONAL_GAMES: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.LEARNING_ANALYTICS: self.models.AI_TEACHER_MODELS, | |
| ModelCategory.PERSONALIZED_LEARNING: self.models.AI_TEACHER_MODELS, | |
| # Qwen Models | |
| ModelCategory.QWEN_REASONING: self.models.QWEN_MODELS, | |
| ModelCategory.QWEN_MATH: self.models.QWEN_MODELS, | |
| ModelCategory.QWEN_CODE: self.models.QWEN_MODELS, | |
| ModelCategory.QWEN_VISION: self.models.QWEN_MODELS, | |
| ModelCategory.QWEN_AUDIO: self.models.QWEN_MODELS, | |
| # DeepSeek Models | |
| ModelCategory.DEEPSEEK_CODING: self.models.DEEPSEEK_MODELS, | |
| ModelCategory.DEEPSEEK_REASONING: self.models.DEEPSEEK_MODELS, | |
| ModelCategory.DEEPSEEK_MATH: self.models.DEEPSEEK_MODELS, | |
| ModelCategory.DEEPSEEK_RESEARCH: self.models.DEEPSEEK_MODELS, | |
| # Advanced Image Processing & Manipulation | |
| ModelCategory.IMAGE_EDITING: self.models.IMAGE_EDITING_MODELS, | |
| ModelCategory.FACE_SWAP: self.models.FACE_SWAP_MODELS, | |
| ModelCategory.FACE_ENHANCEMENT: self.models.FACE_SWAP_MODELS, | |
| ModelCategory.FACE_GENERATION: self.models.FACE_SWAP_MODELS, | |
| ModelCategory.PORTRAIT_EDITING: self.models.IMAGE_EDITING_MODELS, | |
| ModelCategory.PHOTO_RESTORATION: self.models.IMAGE_EDITING_MODELS, | |
| ModelCategory.IMAGE_UPSCALING: self.models.IMAGE_EDITING_MODELS, | |
| ModelCategory.COLOR_CORRECTION: self.models.IMAGE_EDITING_MODELS, | |
| ModelCategory.ARTISTIC_FILTER: self.models.IMAGE_EDITING_MODELS, | |
| # Advanced Speech & Audio | |
| ModelCategory.ADVANCED_TTS: self.models.ADVANCED_SPEECH_MODELS, | |
| ModelCategory.ADVANCED_STT: self.models.ADVANCED_SPEECH_MODELS, | |
| ModelCategory.VOICE_CONVERSION: self.models.ADVANCED_SPEECH_MODELS, | |
| ModelCategory.SPEECH_ENHANCEMENT: self.models.ADVANCED_SPEECH_MODELS, | |
| ModelCategory.AUDIO_GENERATION: self.models.ADVANCED_SPEECH_MODELS, | |
| ModelCategory.MULTILINGUAL_TTS: self.models.ADVANCED_SPEECH_MODELS, | |
| ModelCategory.MULTILINGUAL_STT: self.models.ADVANCED_SPEECH_MODELS, | |
| ModelCategory.REAL_TIME_TRANSLATION: self.models.ADVANCED_SPEECH_MODELS, | |
| # Interactive Avatar & Video Generation | |
| ModelCategory.TALKING_AVATAR: self.models.TALKING_AVATAR_MODELS, | |
| ModelCategory.AVATAR_GENERATION: self.models.TALKING_AVATAR_MODELS, | |
| ModelCategory.LIP_SYNC: self.models.TALKING_AVATAR_MODELS, | |
| ModelCategory.FACIAL_ANIMATION: self.models.TALKING_AVATAR_MODELS, | |
| ModelCategory.GESTURE_GENERATION: self.models.TALKING_AVATAR_MODELS, | |
| ModelCategory.VIRTUAL_PRESENTER: self.models.TALKING_AVATAR_MODELS, | |
| ModelCategory.AI_ANCHOR: self.models.TALKING_AVATAR_MODELS, | |
| # Interactive Language & Conversation | |
| ModelCategory.INTERACTIVE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, | |
| ModelCategory.BILINGUAL_CONVERSATION: self.models.INTERACTIVE_LANGUAGE_MODELS, | |
| ModelCategory.CULTURAL_ADAPTATION: self.models.INTERACTIVE_LANGUAGE_MODELS, | |
| ModelCategory.CONTEXT_AWARE_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, | |
| ModelCategory.PERSONALITY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, | |
| ModelCategory.ROLE_PLAY_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, | |
| ModelCategory.DOMAIN_SPECIFIC_CHAT: self.models.INTERACTIVE_LANGUAGE_MODELS, | |
| } | |
| def get_model_by_id(self, model_id: str) -> Optional[HFModel]: | |
| """Find a model by its Hugging Face model ID""" | |
| for models_list in self.get_all_models().values(): | |
| for model in models_list: | |
| if model.model_id == model_id: | |
| return model | |
| return None | |
| async def call_model(self, model_id: str, category: ModelCategory, **kwargs) -> Any: | |
| """Call a Hugging Face model with the appropriate method based on category""" | |
| async with HuggingFaceInference(self.api_token) as hf: | |
| if category == ModelCategory.TEXT_GENERATION: | |
| return await hf.text_generation(model_id, **kwargs) | |
| elif category == ModelCategory.TEXT_TO_IMAGE: | |
| return await hf.text_to_image(model_id, **kwargs) | |
| elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: | |
| return await hf.automatic_speech_recognition(model_id, **kwargs) | |
| elif category == ModelCategory.TEXT_TO_SPEECH: | |
| return await hf.text_to_speech(model_id, **kwargs) | |
| elif category == ModelCategory.IMAGE_CLASSIFICATION: | |
| return await hf.image_classification(model_id, **kwargs) | |
| elif category == ModelCategory.FEATURE_EXTRACTION: | |
| return await hf.feature_extraction(model_id, **kwargs) | |
| elif category == ModelCategory.TRANSLATION: | |
| return await hf.translation(model_id, **kwargs) | |
| elif category == ModelCategory.SUMMARIZATION: | |
| return await hf.summarization(model_id, **kwargs) | |
| elif category == ModelCategory.QUESTION_ANSWERING: | |
| return await hf.question_answering(model_id, **kwargs) | |
| elif category == ModelCategory.ZERO_SHOT_CLASSIFICATION: | |
| return await hf.zero_shot_classification(model_id, **kwargs) | |
| elif category == ModelCategory.CONVERSATIONAL: | |
| return await hf.conversational(model_id, **kwargs) | |
| # Video and Motion categories | |
| elif category in [ | |
| ModelCategory.TEXT_TO_VIDEO, | |
| ModelCategory.VIDEO_GENERATION, | |
| ]: | |
| return await hf.text_to_video(model_id, **kwargs) | |
| elif category == ModelCategory.VIDEO_TO_TEXT: | |
| return await hf.video_to_text(model_id, **kwargs) | |
| elif category == ModelCategory.VIDEO_CLASSIFICATION: | |
| return await hf.image_classification( | |
| model_id, **kwargs | |
| ) # Similar to image classification | |
| # Code and Development categories | |
| elif category in [ | |
| ModelCategory.CODE_GENERATION, | |
| ModelCategory.APP_GENERATION, | |
| ]: | |
| return await hf.code_generation(model_id, **kwargs) | |
| elif category in [ | |
| ModelCategory.CODE_COMPLETION, | |
| ModelCategory.CODE_EXPLANATION, | |
| ]: | |
| return await hf.code_completion(model_id, **kwargs) | |
| # 3D and AR/VR categories | |
| elif category in [ | |
| ModelCategory.TEXT_TO_3D, | |
| ModelCategory.THREE_D_GENERATION, | |
| ]: | |
| return await hf.text_to_3d(model_id, **kwargs) | |
| elif category in [ModelCategory.IMAGE_TO_3D, ModelCategory.MESH_GENERATION]: | |
| return await hf.image_to_3d(model_id, **kwargs) | |
| # Document Processing categories | |
| elif category == ModelCategory.OCR: | |
| return await hf.ocr(model_id, **kwargs) | |
| elif category in [ | |
| ModelCategory.DOCUMENT_ANALYSIS, | |
| ModelCategory.FORM_PROCESSING, | |
| ModelCategory.TABLE_EXTRACTION, | |
| ModelCategory.LAYOUT_ANALYSIS, | |
| ]: | |
| return await hf.document_analysis(model_id, **kwargs) | |
| elif category == ModelCategory.HANDWRITING_RECOGNITION: | |
| return await hf.ocr(model_id, **kwargs) # Similar to OCR | |
| # Multimodal AI categories | |
| elif category in [ | |
| ModelCategory.VISION_LANGUAGE, | |
| ModelCategory.VISUAL_QUESTION_ANSWERING, | |
| ModelCategory.IMAGE_TEXT_MATCHING, | |
| ]: | |
| return await hf.vision_language(model_id, **kwargs) | |
| elif category in [ | |
| ModelCategory.MULTIMODAL_REASONING, | |
| ModelCategory.MULTIMODAL_CHAT, | |
| ModelCategory.CROSS_MODAL_GENERATION, | |
| ]: | |
| return await hf.multimodal_reasoning(model_id, **kwargs) | |
| # Specialized AI categories | |
| elif category == ModelCategory.MUSIC_GENERATION: | |
| return await hf.music_generation(model_id, **kwargs) | |
| elif category == ModelCategory.VOICE_CLONING: | |
| return await hf.voice_cloning(model_id, **kwargs) | |
| elif category == ModelCategory.SUPER_RESOLUTION: | |
| return await hf.super_resolution(model_id, **kwargs) | |
| elif category in [ | |
| ModelCategory.FACE_RESTORATION, | |
| ModelCategory.IMAGE_INPAINTING, | |
| ModelCategory.IMAGE_OUTPAINTING, | |
| ]: | |
| return await hf.super_resolution( | |
| model_id, **kwargs | |
| ) # Similar processing | |
| elif category == ModelCategory.BACKGROUND_REMOVAL: | |
| return await hf.background_removal(model_id, **kwargs) | |
| # Creative Content categories | |
| elif category in [ | |
| ModelCategory.CREATIVE_WRITING, | |
| ModelCategory.STORY_GENERATION, | |
| ModelCategory.POETRY_GENERATION, | |
| ModelCategory.SCREENPLAY_WRITING, | |
| ]: | |
| return await hf.creative_writing(model_id, **kwargs) | |
| elif category in [ModelCategory.BLOG_WRITING, ModelCategory.MARKETING_COPY]: | |
| return await hf.text_generation( | |
| model_id, **kwargs | |
| ) # Use standard text generation | |
| # Game Development categories | |
| elif category in [ | |
| ModelCategory.CHARACTER_GENERATION, | |
| ModelCategory.LEVEL_GENERATION, | |
| ModelCategory.DIALOGUE_GENERATION, | |
| ModelCategory.GAME_ASSET_GENERATION, | |
| ]: | |
| return await hf.creative_writing( | |
| model_id, **kwargs | |
| ) # Creative generation | |
| # Science and Research categories | |
| elif category in [ | |
| ModelCategory.PROTEIN_FOLDING, | |
| ModelCategory.MOLECULE_GENERATION, | |
| ]: | |
| return await hf.text_generation( | |
| model_id, **kwargs | |
| ) # Specialized text generation | |
| elif category in [ | |
| ModelCategory.SCIENTIFIC_WRITING, | |
| ModelCategory.RESEARCH_ASSISTANCE, | |
| ModelCategory.DATA_ANALYSIS, | |
| ]: | |
| return await hf.text_generation(model_id, **kwargs) | |
| # Business and Productivity categories | |
| elif category in [ | |
| ModelCategory.EMAIL_GENERATION, | |
| ModelCategory.PRESENTATION_CREATION, | |
| ModelCategory.REPORT_GENERATION, | |
| ModelCategory.MEETING_SUMMARIZATION, | |
| ModelCategory.PROJECT_PLANNING, | |
| ]: | |
| return await hf.business_document(model_id, category.value, **kwargs) | |
| # AI Teacher and Education categories | |
| elif category in [ | |
| ModelCategory.AI_TUTORING, | |
| ModelCategory.EDUCATIONAL_CONTENT, | |
| ModelCategory.LESSON_PLANNING, | |
| ModelCategory.CONCEPT_EXPLANATION, | |
| ModelCategory.HOMEWORK_ASSISTANCE, | |
| ModelCategory.QUIZ_GENERATION, | |
| ModelCategory.CURRICULUM_DESIGN, | |
| ModelCategory.LEARNING_ASSESSMENT, | |
| ModelCategory.ADAPTIVE_LEARNING, | |
| ModelCategory.SUBJECT_TEACHING, | |
| ModelCategory.MATH_TUTORING, | |
| ModelCategory.SCIENCE_TUTORING, | |
| ModelCategory.LANGUAGE_TUTORING, | |
| ModelCategory.HISTORY_TUTORING, | |
| ModelCategory.CODING_INSTRUCTION, | |
| ModelCategory.EXAM_PREPARATION, | |
| ModelCategory.STUDY_GUIDE_CREATION, | |
| ModelCategory.EDUCATIONAL_GAMES, | |
| ModelCategory.LEARNING_ANALYTICS, | |
| ModelCategory.PERSONALIZED_LEARNING, | |
| ]: | |
| return await hf.text_generation( | |
| model_id, **kwargs | |
| ) # Educational content generation | |
| # Qwen Model categories | |
| elif category in [ | |
| ModelCategory.QWEN_REASONING, | |
| ModelCategory.QWEN_MATH, | |
| ModelCategory.QWEN_CODE, | |
| ]: | |
| return await hf.text_generation(model_id, **kwargs) | |
| elif category == ModelCategory.QWEN_VISION: | |
| return await hf.vision_language(model_id, **kwargs) | |
| elif category == ModelCategory.QWEN_AUDIO: | |
| return await hf.automatic_speech_recognition(model_id, **kwargs) | |
| # DeepSeek Model categories | |
| elif category in [ | |
| ModelCategory.DEEPSEEK_CODING, | |
| ModelCategory.DEEPSEEK_REASONING, | |
| ModelCategory.DEEPSEEK_MATH, | |
| ModelCategory.DEEPSEEK_RESEARCH, | |
| ]: | |
| return await hf.text_generation(model_id, **kwargs) | |
| # Advanced Image Processing & Manipulation | |
| elif category in [ | |
| ModelCategory.IMAGE_EDITING, | |
| ModelCategory.PORTRAIT_EDITING, | |
| ModelCategory.PHOTO_RESTORATION, | |
| ModelCategory.COLOR_CORRECTION, | |
| ModelCategory.ARTISTIC_FILTER, | |
| ]: | |
| return await hf.text_to_image(model_id, **kwargs) # Image processing | |
| elif category == ModelCategory.IMAGE_UPSCALING: | |
| return await hf.super_resolution(model_id, **kwargs) | |
| elif category in [ | |
| ModelCategory.FACE_SWAP, | |
| ModelCategory.FACE_ENHANCEMENT, | |
| ModelCategory.FACE_GENERATION, | |
| ]: | |
| return await hf.text_to_image(model_id, **kwargs) # Face manipulation | |
| # Advanced Speech & Audio | |
| elif category in [ | |
| ModelCategory.ADVANCED_TTS, | |
| ModelCategory.MULTILINGUAL_TTS, | |
| ModelCategory.VOICE_CONVERSION, | |
| ]: | |
| return await hf.text_to_speech(model_id, **kwargs) | |
| elif category in [ | |
| ModelCategory.ADVANCED_STT, | |
| ModelCategory.MULTILINGUAL_STT, | |
| ModelCategory.SPEECH_ENHANCEMENT, | |
| ]: | |
| return await hf.automatic_speech_recognition(model_id, **kwargs) | |
| elif category in [ | |
| ModelCategory.AUDIO_GENERATION, | |
| ModelCategory.REAL_TIME_TRANSLATION, | |
| ]: | |
| return await hf.text_to_speech(model_id, **kwargs) # Audio generation | |
| # Interactive Avatar & Video Generation | |
| elif category in [ | |
| ModelCategory.TALKING_AVATAR, | |
| ModelCategory.AVATAR_GENERATION, | |
| ModelCategory.LIP_SYNC, | |
| ModelCategory.FACIAL_ANIMATION, | |
| ModelCategory.GESTURE_GENERATION, | |
| ModelCategory.VIRTUAL_PRESENTER, | |
| ModelCategory.AI_ANCHOR, | |
| ]: | |
| return await hf.text_to_video(model_id, **kwargs) # Video generation | |
| # Interactive Language & Conversation | |
| elif category in [ | |
| ModelCategory.INTERACTIVE_CHAT, | |
| ModelCategory.BILINGUAL_CONVERSATION, | |
| ModelCategory.CULTURAL_ADAPTATION, | |
| ModelCategory.CONTEXT_AWARE_CHAT, | |
| ModelCategory.PERSONALITY_CHAT, | |
| ModelCategory.ROLE_PLAY_CHAT, | |
| ModelCategory.DOMAIN_SPECIFIC_CHAT, | |
| ]: | |
| return await hf.conversational(model_id, **kwargs) | |
| else: | |
| raise ValueError(f"Unsupported model category: {category}") | |