"""API models for Dots.OCR text extraction service. This module defines the data structures used for API requests, responses, and internal data processing. """ from typing import List, Optional, Dict, Any from pydantic import BaseModel, Field class BoundingBox(BaseModel): """Normalized bounding box coordinates.""" x1: float = Field(..., ge=0.0, le=1.0, description="Top-left x coordinate") y1: float = Field(..., ge=0.0, le=1.0, description="Top-left y coordinate") x2: float = Field(..., ge=0.0, le=1.0, description="Bottom-right x coordinate") y2: float = Field(..., ge=0.0, le=1.0, description="Bottom-right y coordinate") class ExtractedField(BaseModel): """Individual extracted field with confidence and source.""" field_name: str = Field(..., description="Standardized field name") value: Optional[str] = Field(None, description="Extracted field value") confidence: float = Field(..., ge=0.0, le=1.0, description="Extraction confidence") source: str = Field(..., description="Extraction source (e.g., 'ocr')") class IdCardFields(BaseModel): """Structured fields extracted from identity documents.""" document_number: Optional[ExtractedField] = Field( None, description="Document number/ID" ) document_type: Optional[ExtractedField] = Field( None, description="Type of document" ) issuing_country: Optional[ExtractedField] = Field( None, description="Issuing country code" ) issuing_authority: Optional[ExtractedField] = Field( None, description="Issuing authority" ) # Personal Information surname: Optional[ExtractedField] = Field(None, description="Family name/surname") given_names: Optional[ExtractedField] = Field(None, description="Given names") nationality: Optional[ExtractedField] = Field(None, description="Nationality code") date_of_birth: Optional[ExtractedField] = Field(None, description="Date of birth") gender: Optional[ExtractedField] = Field(None, description="Gender") place_of_birth: Optional[ExtractedField] = Field(None, description="Place of birth") # Validity Information date_of_issue: Optional[ExtractedField] = Field(None, description="Date of issue") date_of_expiry: Optional[ExtractedField] = Field(None, description="Date of expiry") personal_number: Optional[ExtractedField] = Field( None, description="Personal number" ) # Additional fields for specific document types optional_data_1: Optional[ExtractedField] = Field( None, description="Optional data field 1" ) optional_data_2: Optional[ExtractedField] = Field( None, description="Optional data field 2" ) class ExtractedFields(BaseModel): """All extracted fields from identity document.""" document_number: Optional[ExtractedField] = None document_type: Optional[ExtractedField] = None issuing_country: Optional[ExtractedField] = None issuing_authority: Optional[ExtractedField] = None surname: Optional[ExtractedField] = None given_names: Optional[ExtractedField] = None nationality: Optional[ExtractedField] = None date_of_birth: Optional[ExtractedField] = None gender: Optional[ExtractedField] = None place_of_birth: Optional[ExtractedField] = None date_of_issue: Optional[ExtractedField] = None date_of_expiry: Optional[ExtractedField] = None personal_number: Optional[ExtractedField] = None optional_data_1: Optional[ExtractedField] = None optional_data_2: Optional[ExtractedField] = None class MRZData(BaseModel): """Machine Readable Zone data.""" # Primary canonical fields document_type: Optional[str] = Field( None, description="MRZ document type (TD1|TD2|TD3)" ) issuing_country: Optional[str] = Field(None, description="Issuing country code") surname: Optional[str] = Field(None, description="Surname from MRZ") given_names: Optional[str] = Field(None, description="Given names from MRZ") document_number: Optional[str] = Field(None, description="Document number from MRZ") nationality: Optional[str] = Field(None, description="Nationality code from MRZ") date_of_birth: Optional[str] = Field(None, description="Date of birth from MRZ") gender: Optional[str] = Field(None, description="Gender from MRZ") date_of_expiry: Optional[str] = Field(None, description="Date of expiry from MRZ") personal_number: Optional[str] = Field(None, description="Personal number from MRZ") raw_mrz: Optional[str] = Field(None, description="Raw MRZ text") confidence: float = Field( 0.0, ge=0.0, le=1.0, description="MRZ extraction confidence" ) # Backwards compatibility fields (some older code/tests expect these names) # These duplicate information from the canonical fields above. format_type: Optional[str] = Field( None, description="Alias of document_type for backward compatibility" ) raw_text: Optional[str] = Field( None, description="Alias of raw_mrz for backward compatibility" ) class OCRDetection(BaseModel): """Single OCR detection result.""" mrz_data: Optional[MRZData] = Field(None, description="MRZ data if detected") extracted_fields: ExtractedFields = Field(..., description="Extracted field data") class OCRResponse(BaseModel): """OCR API response.""" request_id: str = Field(..., description="Unique request identifier") media_type: str = Field(..., description="Media type processed") processing_time: float = Field(..., description="Processing time in seconds") detections: List[OCRDetection] = Field(..., description="List of OCR detections")