tex2lab

Paused

File size: 7,942 Bytes

3e6b063

# controller/pix2text_bp.py
import os
import cv2
from flask import Blueprint, render_template, request, jsonify
from pix2text import Pix2Text
from utils.math_solver import solve_equation
from controller.models.camera_to_latex import camera_to_latex

# Initialize Pix2Text globally once
print("🔹 Loading Pix2Text model (mfd)...")
try:
    p2t = Pix2Text(analyzer_config=dict(model_name='mfd'))
    print("✅ Pix2Text model loaded successfully.")
except Exception as e:
    print(f"❌ Pix2Text failed to initialize: {e}")
    p2t = None

# Flask blueprint
pix2text_bp = Blueprint('pix2text_bp', __name__)

UPLOAD_FOLDER = 'static/uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Optional preprocessing
def preprocess_image(image_path):
    """Preprocess image for better OCR results"""
    try:
        # Read image
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError("Could not read image")
        
        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Apply mild Gaussian blur to reduce noise while preserving edges
        blurred = cv2.GaussianBlur(gray, (3, 3), 0)
        
        # Apply adaptive thresholding with parameters better suited for text
        thresh = cv2.adaptiveThreshold(
            blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, 2
        )
        
        # Save processed image
        processed_path = os.path.join(
            PROCESSED_FOLDER, 
            os.path.basename(image_path).replace('.', '_processed.')
        )
        cv2.imwrite(processed_path, thresh)
        
        return processed_path
    except Exception as e:
        print(f"Preprocessing error: {e}")
        return image_path  # Return original if preprocessing fails

# -----------------------------
# Math Routes
# -----------------------------
@pix2text_bp.route("/math")
def math_page():
    return render_template("math.html")

@pix2text_bp.route("/math/process", methods=["POST"])
def process_math_image():
    try:
        if 'image' not in request.files:
            return jsonify({'error': 'No image file provided'}), 400

        file = request.files['image']
        if not file.filename:
            return jsonify({'error': 'No file selected'}), 400

        filename = file.filename
        filepath = os.path.join(UPLOAD_FOLDER, filename)
        file.save(filepath)

        # Preprocess (optional)
        processed_path = preprocess_image(filepath)

        # Run Pix2Text
        if p2t:
            result = p2t.recognize(processed_path)
            if isinstance(result, dict):
                latex = result.get('text', '')
            elif isinstance(result, list) and result and isinstance(result[0], dict):
                latex = result[0].get('text', '')
            else:
                latex = str(result)
        else:
            latex = "\\text{Pix2Text not initialized}"

        return jsonify({
            'success': True,
            'latex': latex,
            'image_path': filepath
        })

    except Exception as e:
        print(f"❌ Error in /math/process: {e}")
        return jsonify({'error': str(e)}), 500

@pix2text_bp.route("/math/solve", methods=["POST"])
def solve_math_equation():
    try:
        data = request.get_json()
        if not data or 'latex' not in data:
            return jsonify({'error': 'No equation provided'}), 400

        solution = solve_equation(data['latex'])
        return jsonify({'success': True, 'solution': solution})

    except Exception as e:
        print(f"❌ Error in /math/solve: {e}")
        return jsonify({'error': str(e)}), 500

# -----------------------------
# Camera Routes
# -----------------------------
# @pix2text_bp.route("/camera")
# def camera_page():
#     return render_template("camera.html")

@pix2text_bp.route("/camera")
def camera_page():
    """Render the camera capture page"""
    return render_template("camera.html")


@pix2text_bp.route("/camera/solve", methods=["POST"])
def solve_camera_equation():
    """Solve a LaTeX equation from camera input"""
    try:
        data = request.get_json()
        if not data:
            return jsonify({'error': 'No data provided'}), 400
        
        latex_equation = data.get('latex', '')
        if not latex_equation:
            return jsonify({'error': 'No equation provided'}), 400
        
        # Solve the equation
        solution = solve_equation(latex_equation)
        
        return jsonify({
            'success': True,
            'solution': solution
        })
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500
    return jsonify({'error': 'Unknown error'}), 500


@pix2text_bp.route("/camera/process", methods=["POST"])
def process_camera_image():
    """Process camera captured image using Pix2Text"""
    try:
        if 'image' not in request.files:
            return jsonify({'error': 'No image file provided'}), 400
        
        file = request.files['image']
        if file.filename == '':
            return jsonify({'error': 'No image file selected'}), 400
        
        if file and file.filename:
            # Save original image
            filename = file.filename
            filepath = os.path.join(UPLOAD_FOLDER, filename)
            file.save(filepath)
            
            # For camera captures, try processing the original image first
            # as preprocessing might distort mathematical symbols
            processed_path = filepath
            
            # Process with Pix2Text if available
            if p2t:
                print(f"Processing image: {processed_path}")
                result = p2t.recognize(processed_path)
                print(f"Raw result: {result}")
                
                # Handle different result types
                if isinstance(result, dict):
                    latex_code = result.get('text', '')
                elif isinstance(result, list):
                    # If result is a list, extract text from first item
                    if result and isinstance(result[0], dict):
                        latex_code = result[0].get('text', '')
                    else:
                        latex_code = str(result)
                else:
                    latex_code = str(result)
                
                # If we get no result or very short result, try with preprocessing
                if len(latex_code.strip()) < 2:
                    print("Result too short, trying with preprocessing...")
                    processed_path = preprocess_image(filepath)
                    result = p2t.recognize(processed_path)
                    print(f"Preprocessed result: {result}")
                    
                    if isinstance(result, dict):
                        latex_code = result.get('text', '')
                    elif isinstance(result, list):
                        if result and isinstance(result[0], dict):
                            latex_code = result[0].get('text', '')
                        else:
                            latex_code = str(result)
                    else:
                        latex_code = str(result)
                
                print(f"Final extracted LaTeX: {latex_code}")
            else:
                latex_code = "\\text{Pix2Text not available}"
            
            return jsonify({
                'success': True,
                'latex': latex_code,
                'image_path': filepath
            })
            
    except Exception as e:
        print(f"Error processing camera image: {e}")
        return jsonify({'error': str(e)}), 500
    return jsonify({'error': 'Unknown error'}), 500