Spaces:
Paused
Paused
File size: 7,942 Bytes
3e6b063 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# controller/pix2text_bp.py
import os
import cv2
from flask import Blueprint, render_template, request, jsonify
from pix2text import Pix2Text
from utils.math_solver import solve_equation
from controller.models.camera_to_latex import camera_to_latex
# Initialize Pix2Text globally once
print("🔹 Loading Pix2Text model (mfd)...")
try:
p2t = Pix2Text(analyzer_config=dict(model_name='mfd'))
print("✅ Pix2Text model loaded successfully.")
except Exception as e:
print(f"❌ Pix2Text failed to initialize: {e}")
p2t = None
# Flask blueprint
pix2text_bp = Blueprint('pix2text_bp', __name__)
UPLOAD_FOLDER = 'static/uploads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# Optional preprocessing
def preprocess_image(image_path):
"""Preprocess image for better OCR results"""
try:
# Read image
img = cv2.imread(image_path)
if img is None:
raise ValueError("Could not read image")
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply mild Gaussian blur to reduce noise while preserving edges
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
# Apply adaptive thresholding with parameters better suited for text
thresh = cv2.adaptiveThreshold(
blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, 2
)
# Save processed image
processed_path = os.path.join(
PROCESSED_FOLDER,
os.path.basename(image_path).replace('.', '_processed.')
)
cv2.imwrite(processed_path, thresh)
return processed_path
except Exception as e:
print(f"Preprocessing error: {e}")
return image_path # Return original if preprocessing fails
# -----------------------------
# Math Routes
# -----------------------------
@pix2text_bp.route("/math")
def math_page():
return render_template("math.html")
@pix2text_bp.route("/math/process", methods=["POST"])
def process_math_image():
try:
if 'image' not in request.files:
return jsonify({'error': 'No image file provided'}), 400
file = request.files['image']
if not file.filename:
return jsonify({'error': 'No file selected'}), 400
filename = file.filename
filepath = os.path.join(UPLOAD_FOLDER, filename)
file.save(filepath)
# Preprocess (optional)
processed_path = preprocess_image(filepath)
# Run Pix2Text
if p2t:
result = p2t.recognize(processed_path)
if isinstance(result, dict):
latex = result.get('text', '')
elif isinstance(result, list) and result and isinstance(result[0], dict):
latex = result[0].get('text', '')
else:
latex = str(result)
else:
latex = "\\text{Pix2Text not initialized}"
return jsonify({
'success': True,
'latex': latex,
'image_path': filepath
})
except Exception as e:
print(f"❌ Error in /math/process: {e}")
return jsonify({'error': str(e)}), 500
@pix2text_bp.route("/math/solve", methods=["POST"])
def solve_math_equation():
try:
data = request.get_json()
if not data or 'latex' not in data:
return jsonify({'error': 'No equation provided'}), 400
solution = solve_equation(data['latex'])
return jsonify({'success': True, 'solution': solution})
except Exception as e:
print(f"❌ Error in /math/solve: {e}")
return jsonify({'error': str(e)}), 500
# -----------------------------
# Camera Routes
# -----------------------------
# @pix2text_bp.route("/camera")
# def camera_page():
# return render_template("camera.html")
@pix2text_bp.route("/camera")
def camera_page():
"""Render the camera capture page"""
return render_template("camera.html")
@pix2text_bp.route("/camera/solve", methods=["POST"])
def solve_camera_equation():
"""Solve a LaTeX equation from camera input"""
try:
data = request.get_json()
if not data:
return jsonify({'error': 'No data provided'}), 400
latex_equation = data.get('latex', '')
if not latex_equation:
return jsonify({'error': 'No equation provided'}), 400
# Solve the equation
solution = solve_equation(latex_equation)
return jsonify({
'success': True,
'solution': solution
})
except Exception as e:
return jsonify({'error': str(e)}), 500
return jsonify({'error': 'Unknown error'}), 500
@pix2text_bp.route("/camera/process", methods=["POST"])
def process_camera_image():
"""Process camera captured image using Pix2Text"""
try:
if 'image' not in request.files:
return jsonify({'error': 'No image file provided'}), 400
file = request.files['image']
if file.filename == '':
return jsonify({'error': 'No image file selected'}), 400
if file and file.filename:
# Save original image
filename = file.filename
filepath = os.path.join(UPLOAD_FOLDER, filename)
file.save(filepath)
# For camera captures, try processing the original image first
# as preprocessing might distort mathematical symbols
processed_path = filepath
# Process with Pix2Text if available
if p2t:
print(f"Processing image: {processed_path}")
result = p2t.recognize(processed_path)
print(f"Raw result: {result}")
# Handle different result types
if isinstance(result, dict):
latex_code = result.get('text', '')
elif isinstance(result, list):
# If result is a list, extract text from first item
if result and isinstance(result[0], dict):
latex_code = result[0].get('text', '')
else:
latex_code = str(result)
else:
latex_code = str(result)
# If we get no result or very short result, try with preprocessing
if len(latex_code.strip()) < 2:
print("Result too short, trying with preprocessing...")
processed_path = preprocess_image(filepath)
result = p2t.recognize(processed_path)
print(f"Preprocessed result: {result}")
if isinstance(result, dict):
latex_code = result.get('text', '')
elif isinstance(result, list):
if result and isinstance(result[0], dict):
latex_code = result[0].get('text', '')
else:
latex_code = str(result)
else:
latex_code = str(result)
print(f"Final extracted LaTeX: {latex_code}")
else:
latex_code = "\\text{Pix2Text not available}"
return jsonify({
'success': True,
'latex': latex_code,
'image_path': filepath
})
except Exception as e:
print(f"Error processing camera image: {e}")
return jsonify({'error': str(e)}), 500
return jsonify({'error': 'Unknown error'}), 500
|