Update app_main.py
Browse files- app_main.py +29 -21
app_main.py
CHANGED
|
@@ -9,6 +9,8 @@ from dotenv import load_dotenv
|
|
| 9 |
import pytesseract
|
| 10 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
| 11 |
from langchain_community.document_loaders.image_captions import ImageCaptionLoader
|
|
|
|
|
|
|
| 12 |
|
| 13 |
app = Flask(__name__)
|
| 14 |
|
|
@@ -126,27 +128,33 @@ def index():
|
|
| 126 |
return render_template('app_index.html')
|
| 127 |
|
| 128 |
# API endpoint
|
| 129 |
-
@app.route('/
|
| 130 |
-
def
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
if __name__ == '__main__':
|
| 152 |
app.run(host='0.0.0.0', port=7860, debug=True)
|
|
|
|
| 9 |
import pytesseract
|
| 10 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
| 11 |
from langchain_community.document_loaders.image_captions import ImageCaptionLoader
|
| 12 |
+
from werkzeug.utils import secure_filename
|
| 13 |
+
import tempfile
|
| 14 |
|
| 15 |
app = Flask(__name__)
|
| 16 |
|
|
|
|
| 128 |
return render_template('app_index.html')
|
| 129 |
|
| 130 |
# API endpoint
|
| 131 |
+
@app.route('/process_pdf', methods=['POST'])
|
| 132 |
+
def process_pdf():
|
| 133 |
+
try:
|
| 134 |
+
if 'pdf_file' not in request.files:
|
| 135 |
+
return jsonify({"error": "Missing PDF file in form-data with key 'pdf_file'"}), 400
|
| 136 |
+
|
| 137 |
+
pdf_file = request.files['pdf_file']
|
| 138 |
+
if pdf_file.filename == '':
|
| 139 |
+
return jsonify({"error": "Empty filename"}), 400
|
| 140 |
+
|
| 141 |
+
# Save the uploaded PDF temporarily
|
| 142 |
+
filename = secure_filename(pdf_file.filename)
|
| 143 |
+
temp_dir = tempfile.mkdtemp()
|
| 144 |
+
saved_pdf_path = os.path.join(temp_dir, filename)
|
| 145 |
+
pdf_file.save(saved_pdf_path)
|
| 146 |
+
|
| 147 |
+
# Extract & process
|
| 148 |
+
json_path = None
|
| 149 |
+
output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
|
| 150 |
+
|
| 151 |
+
return jsonify({
|
| 152 |
+
"message": "✅ PDF processed successfully",
|
| 153 |
+
"output_json": output_path,
|
| 154 |
+
"sprites": result
|
| 155 |
+
})
|
| 156 |
+
except Exception as e:
|
| 157 |
+
return jsonify({"error": f"❌ Failed to process PDF: {str(e)}"}), 500
|
| 158 |
|
| 159 |
if __name__ == '__main__':
|
| 160 |
app.run(host='0.0.0.0', port=7860, debug=True)
|