Nihal2000's picture
Gradio mcp
9145e48
raw
history blame
3.95 kB
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Any
from datetime import datetime
from enum import Enum
class DocumentType(str, Enum):
PDF = "pdf"
TEXT = "txt"
DOCX = "docx"
IMAGE = "image"
HTML = "html"
class ProcessingStatus(str, Enum):
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
class Document(BaseModel):
id: str = Field(..., description="Unique document identifier")
filename: str = Field(..., description="Original filename")
content: str = Field(..., description="Extracted text content")
doc_type: DocumentType = Field(..., description="Document type")
file_size: int = Field(..., description="File size in bytes")
created_at: datetime = Field(default_factory=datetime.utcnow)
metadata: Dict[str, Any] = Field(default_factory=dict)
tags: List[str] = Field(default_factory=list)
summary: Optional[str] = None
category: Optional[str] = None
language: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
"id": self.id,
"filename": self.filename,
"content": self.content[:500] + "..." if len(self.content) > 500 else self.content,
"doc_type": self.doc_type,
"file_size": self.file_size,
"created_at": self.created_at.isoformat(),
"metadata": self.metadata,
"tags": self.tags,
"summary": self.summary,
"category": self.category,
"language": self.language
}
class Chunk(BaseModel):
id: str = Field(..., description="Unique chunk identifier")
document_id: str = Field(..., description="Parent document ID")
content: str = Field(..., description="Chunk text content")
chunk_index: int = Field(..., description="Position in document")
start_pos: int = Field(..., description="Start position in original document")
end_pos: int = Field(..., description="End position in original document")
embedding: Optional[List[float]] = None
metadata: Dict[str, Any] = Field(default_factory=dict)
class SearchResult(BaseModel):
chunk_id: str = Field(..., description="Matching chunk ID")
document_id: str = Field(..., description="Source document ID")
content: str = Field(..., description="Matching content")
score: float = Field(..., description="Similarity score")
metadata: Dict[str, Any] = Field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"chunk_id": self.chunk_id,
"document_id": self.document_id,
"content": self.content,
"score": self.score,
"metadata": self.metadata
}
class ProcessingTask(BaseModel):
task_id: str = Field(..., description="Unique task identifier")
document_id: Optional[str] = None
status: ProcessingStatus = ProcessingStatus.PENDING
progress: float = Field(default=0.0, ge=0.0, le=100.0)
message: Optional[str] = None
error: Optional[str] = None
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
class SummaryRequest(BaseModel):
content: Optional[str] = None
document_id: Optional[str] = None
style: str = Field(default="concise", description="Summary style")
max_length: Optional[int] = None
class TagGenerationRequest(BaseModel):
content: Optional[str] = None
document_id: Optional[str] = None
max_tags: int = Field(default=5, ge=1, le=20)
class QuestionAnswerRequest(BaseModel):
question: str = Field(..., description="Question to answer")
context_filter: Optional[Dict[str, Any]] = None
max_context_length: int = Field(default=2000)
class CategorizationRequest(BaseModel):
content: Optional[str] = None
document_id: Optional[str] = None
categories: Optional[List[str]] = None