Spaces:
Running
Running
PDF , word, Excel uzantılı dosyalar seçerek yükleyeceğim yüklenen dosyaların içeriğini metin tablo başlık girintileri vs eksiksiz olarak çıkartacak yüklenen belgeler her zaman metin olmayabilir resim de olabileceğini unutmadan içeriği json, markdown formatına çevirip indirecek uygulama istiyroum
Browse files- README.md +8 -5
- components/footer.js +46 -0
- components/navbar.js +62 -0
- index.html +73 -19
- script.js +311 -0
- style.css +24 -19
README.md
CHANGED
|
@@ -1,10 +1,13 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: DocExtractor Pro 📄✨
|
| 3 |
+
colorFrom: red
|
| 4 |
+
colorTo: purple
|
| 5 |
+
emoji: 🐳
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
+
tags:
|
| 9 |
+
- deepsite-v3
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Welcome to your new DeepSite project!
|
| 13 |
+
This project was created with [DeepSite](https://huggingface.co/deepsite).
|
components/footer.js
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class CustomFooter extends HTMLElement {
|
| 2 |
+
connectedCallback() {
|
| 3 |
+
this.attachShadow({ mode: 'open' });
|
| 4 |
+
this.shadowRoot.innerHTML = `
|
| 5 |
+
<style>
|
| 6 |
+
:host {
|
| 7 |
+
display: block;
|
| 8 |
+
width: 100%;
|
| 9 |
+
background-color: #1e293b;
|
| 10 |
+
color: #f8fafc;
|
| 11 |
+
}
|
| 12 |
+
.footer {
|
| 13 |
+
padding: 2rem 1rem;
|
| 14 |
+
text-align: center;
|
| 15 |
+
}
|
| 16 |
+
.footer-links {
|
| 17 |
+
display: flex;
|
| 18 |
+
justify-content: center;
|
| 19 |
+
gap: 1.5rem;
|
| 20 |
+
margin-bottom: 1.5rem;
|
| 21 |
+
}
|
| 22 |
+
.footer-link {
|
| 23 |
+
color: #e2e8f0;
|
| 24 |
+
text-decoration: none;
|
| 25 |
+
transition: color 0.2s;
|
| 26 |
+
}
|
| 27 |
+
.footer-link:hover {
|
| 28 |
+
color: #ffffff;
|
| 29 |
+
}
|
| 30 |
+
.copyright {
|
| 31 |
+
font-size: 0.875rem;
|
| 32 |
+
color: #94a3b8;
|
| 33 |
+
}
|
| 34 |
+
</style>
|
| 35 |
+
<footer class="footer">
|
| 36 |
+
<div class="footer-links">
|
| 37 |
+
<a href="#" class="footer-link">Privacy Policy</a>
|
| 38 |
+
<a href="#" class="footer-link">Terms of Service</a>
|
| 39 |
+
<a href="#" class="footer-link">Contact Us</a>
|
| 40 |
+
</div>
|
| 41 |
+
<p class="copyright">© ${new Date().getFullYear()} DocExtractor Pro. All rights reserved.</p>
|
| 42 |
+
</footer>
|
| 43 |
+
`;
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
customElements.define('custom-footer', CustomFooter);
|
components/navbar.js
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class CustomNavbar extends HTMLElement {
|
| 2 |
+
connectedCallback() {
|
| 3 |
+
this.attachShadow({ mode: 'open' });
|
| 4 |
+
this.shadowRoot.innerHTML = `
|
| 5 |
+
<style>
|
| 6 |
+
:host {
|
| 7 |
+
display: block;
|
| 8 |
+
width: 100%;
|
| 9 |
+
background-color: white;
|
| 10 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
| 11 |
+
}
|
| 12 |
+
.navbar {
|
| 13 |
+
display: flex;
|
| 14 |
+
justify-content: space-between;
|
| 15 |
+
align-items: center;
|
| 16 |
+
padding: 1rem 2rem;
|
| 17 |
+
}
|
| 18 |
+
.logo {
|
| 19 |
+
display: flex;
|
| 20 |
+
align-items: center;
|
| 21 |
+
font-weight: 700;
|
| 22 |
+
font-size: 1.25rem;
|
| 23 |
+
color: #4f46e5;
|
| 24 |
+
text-decoration: none;
|
| 25 |
+
}
|
| 26 |
+
.logo-icon {
|
| 27 |
+
margin-right: 0.5rem;
|
| 28 |
+
}
|
| 29 |
+
.nav-links {
|
| 30 |
+
display: flex;
|
| 31 |
+
gap: 1.5rem;
|
| 32 |
+
}
|
| 33 |
+
.nav-link {
|
| 34 |
+
color: #4b5563;
|
| 35 |
+
text-decoration: none;
|
| 36 |
+
font-weight: 500;
|
| 37 |
+
transition: color 0.2s;
|
| 38 |
+
}
|
| 39 |
+
.nav-link:hover {
|
| 40 |
+
color: #4f46e5;
|
| 41 |
+
}
|
| 42 |
+
@media (max-width: 768px) {
|
| 43 |
+
.nav-links {
|
| 44 |
+
display: none;
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
</style>
|
| 48 |
+
<nav class="navbar">
|
| 49 |
+
<a href="/" class="logo">
|
| 50 |
+
<i data-feather="file-text" class="logo-icon"></i>
|
| 51 |
+
DocExtractor Pro
|
| 52 |
+
</a>
|
| 53 |
+
<div class="nav-links">
|
| 54 |
+
<a href="#" class="nav-link">Home</a>
|
| 55 |
+
<a href="#" class="nav-link">Features</a>
|
| 56 |
+
<a href="#" class="nav-link">About</a>
|
| 57 |
+
</div>
|
| 58 |
+
</nav>
|
| 59 |
+
`;
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
customElements.define('custom-navbar', CustomNavbar);
|
index.html
CHANGED
|
@@ -1,19 +1,73 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>DocExtractor Pro - Convert Files to Text Formats</title>
|
| 7 |
+
<link rel="stylesheet" href="style.css">
|
| 8 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 9 |
+
<script src="https://cdn.jsdelivr.net/npm/feather-icons/dist/feather.min.js"></script>
|
| 10 |
+
<script src="https://unpkg.com/feather-icons"></script>
|
| 11 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.11.338/pdf.min.js"></script>
|
| 12 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/mammoth/1.4.0/mammoth.browser.min.js"></script>
|
| 13 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js"></script>
|
| 14 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/tesseract.js/4.1.1/tesseract.min.js"></script>
|
| 15 |
+
<script src="script.js"></script>
|
| 16 |
+
</head>
|
| 17 |
+
<body class="bg-gray-50 min-h-screen">
|
| 18 |
+
<custom-navbar></custom-navbar>
|
| 19 |
+
|
| 20 |
+
<main class="container mx-auto px-4 py-8">
|
| 21 |
+
<section class="max-w-4xl mx-auto bg-white rounded-xl shadow-md overflow-hidden p-6 mb-8">
|
| 22 |
+
<h1 class="text-3xl font-bold text-gray-800 mb-4">Document Extractor</h1>
|
| 23 |
+
<p class="text-gray-600 mb-6">Upload PDF, Word, or Excel files to extract text with formatting, tables, and images (OCR). Convert to JSON or Markdown formats.</p>
|
| 24 |
+
|
| 25 |
+
<div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center mb-6">
|
| 26 |
+
<input type="file" id="fileInput" class="hidden" accept=".pdf,.docx,.xlsx,.xls,.doc,.png,.jpg,.jpeg" multiple>
|
| 27 |
+
<button id="uploadBtn" class="bg-indigo-600 hover:bg-indigo-700 text-white font-medium py-3 px-6 rounded-lg transition duration-200 flex items-center mx-auto">
|
| 28 |
+
<i data-feather="upload" class="mr-2"></i> Choose Files
|
| 29 |
+
</button>
|
| 30 |
+
<p class="text-gray-500 mt-3">Supported formats: PDF, DOCX, XLSX, JPG, PNG</p>
|
| 31 |
+
</div>
|
| 32 |
+
|
| 33 |
+
<div id="filePreviewContainer" class="hidden">
|
| 34 |
+
<h2 class="text-xl font-semibold text-gray-800 mb-4">Selected Files</h2>
|
| 35 |
+
<div id="filePreviewList" class="space-y-3 mb-6"></div>
|
| 36 |
+
</div>
|
| 37 |
+
|
| 38 |
+
<div class="flex flex-col sm:flex-row gap-4 mb-6">
|
| 39 |
+
<div class="w-full">
|
| 40 |
+
<label class="block text-gray-700 font-medium mb-2" for="outputFormat">Output Format</label>
|
| 41 |
+
<select id="outputFormat" class="w-full p-3 border border-gray-300 rounded-lg focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500">
|
| 42 |
+
<option value="json">JSON</option>
|
| 43 |
+
<option value="markdown">Markdown</option>
|
| 44 |
+
<option value="text">Plain Text</option>
|
| 45 |
+
</select>
|
| 46 |
+
</div>
|
| 47 |
+
<button id="processBtn" class="bg-indigo-600 hover:bg-indigo-700 text-white font-medium py-3 px-6 rounded-lg transition duration-200 flex items-center justify-center mt-6 sm:mt-auto">
|
| 48 |
+
<i data-feather="cpu" class="mr-2"></i> Process Files
|
| 49 |
+
</button>
|
| 50 |
+
</div>
|
| 51 |
+
|
| 52 |
+
<div id="resultsSection" class="hidden">
|
| 53 |
+
<div class="flex justify-between items-center mb-4">
|
| 54 |
+
<h2 class="text-xl font-semibold text-gray-800">Extracted Content</h2>
|
| 55 |
+
<button id="downloadAllBtn" class="bg-green-600 hover:bg-green-700 text-white font-medium py-2 px-4 rounded-lg transition duration-200 flex items-center">
|
| 56 |
+
<i data-feather="download" class="mr-2"></i> Download All
|
| 57 |
+
</button>
|
| 58 |
+
</div>
|
| 59 |
+
<div id="resultsContainer" class="space-y-6"></div>
|
| 60 |
+
</div>
|
| 61 |
+
</section>
|
| 62 |
+
</main>
|
| 63 |
+
|
| 64 |
+
<custom-footer></custom-footer>
|
| 65 |
+
|
| 66 |
+
<script src="components/navbar.js"></script>
|
| 67 |
+
<script src="components/footer.js"></script>
|
| 68 |
+
<script>
|
| 69 |
+
feather.replace();
|
| 70 |
+
</script>
|
| 71 |
+
<script src="https://huggingface.co/deepsite/deepsite-badge.js"></script>
|
| 72 |
+
</body>
|
| 73 |
+
</html>
|
script.js
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
document.addEventListener('DOMContentLoaded', function() {
|
| 2 |
+
const uploadBtn = document.getElementById('uploadBtn');
|
| 3 |
+
const fileInput = document.getElementById('fileInput');
|
| 4 |
+
const filePreviewList = document.getElementById('filePreviewList');
|
| 5 |
+
const filePreviewContainer = document.getElementById('filePreviewContainer');
|
| 6 |
+
const processBtn = document.getElementById('processBtn');
|
| 7 |
+
const outputFormat = document.getElementById('outputFormat');
|
| 8 |
+
const resultsContainer = document.getElementById('resultsContainer');
|
| 9 |
+
const resultsSection = document.getElementById('resultsSection');
|
| 10 |
+
const downloadAllBtn = document.getElementById('downloadAllBtn');
|
| 11 |
+
|
| 12 |
+
let files = [];
|
| 13 |
+
let processedResults = [];
|
| 14 |
+
|
| 15 |
+
// Set PDF.js worker path
|
| 16 |
+
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.11.338/pdf.worker.min.js';
|
| 17 |
+
|
| 18 |
+
// Handle file selection
|
| 19 |
+
uploadBtn.addEventListener('click', () => fileInput.click());
|
| 20 |
+
|
| 21 |
+
fileInput.addEventListener('change', handleFileSelection);
|
| 22 |
+
|
| 23 |
+
function handleFileSelection(e) {
|
| 24 |
+
files = Array.from(e.target.files);
|
| 25 |
+
filePreviewList.innerHTML = '';
|
| 26 |
+
|
| 27 |
+
if (files.length === 0) {
|
| 28 |
+
filePreviewContainer.classList.add('hidden');
|
| 29 |
+
return;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
files.forEach((file, index) => {
|
| 33 |
+
const filePreview = createFilePreview(file, index);
|
| 34 |
+
filePreviewList.appendChild(filePreview);
|
| 35 |
+
});
|
| 36 |
+
|
| 37 |
+
filePreviewContainer.classList.remove('hidden');
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
function createFilePreview(file, index) {
|
| 41 |
+
const card = document.createElement('div');
|
| 42 |
+
card.className = 'file-card bg-gray-50 rounded-lg p-4 flex items-center justify-between';
|
| 43 |
+
|
| 44 |
+
const fileInfo = document.createElement('div');
|
| 45 |
+
fileInfo.className = 'flex items-center';
|
| 46 |
+
|
| 47 |
+
const icon = document.createElement('div');
|
| 48 |
+
icon.className = 'bg-gray-200 p-2 rounded-full mr-3';
|
| 49 |
+
|
| 50 |
+
const fileIcon = document.createElement('i');
|
| 51 |
+
fileIcon.dataset.feather = getFileIcon(file);
|
| 52 |
+
icon.appendChild(fileIcon);
|
| 53 |
+
|
| 54 |
+
const fileName = document.createElement('span');
|
| 55 |
+
fileName.className = 'font-medium text-gray-800';
|
| 56 |
+
fileName.textContent = file.name;
|
| 57 |
+
|
| 58 |
+
fileInfo.appendChild(icon);
|
| 59 |
+
fileInfo.appendChild(fileName);
|
| 60 |
+
|
| 61 |
+
const fileSize = document.createElement('span');
|
| 62 |
+
fileSize.className = 'text-gray-500 text-sm';
|
| 63 |
+
fileSize.textContent = formatFileSize(file.size);
|
| 64 |
+
|
| 65 |
+
card.appendChild(fileInfo);
|
| 66 |
+
card.appendChild(fileSize);
|
| 67 |
+
|
| 68 |
+
feather.replace();
|
| 69 |
+
return card;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
function getFileIcon(file) {
|
| 73 |
+
if (file.type.includes('pdf')) return 'file';
|
| 74 |
+
if (file.type.includes('word') || file.type.includes('document')) return 'file-text';
|
| 75 |
+
if (file.type.includes('excel') || file.type.includes('spreadsheet')) return 'file-text';
|
| 76 |
+
if (file.type.includes('image')) return 'image';
|
| 77 |
+
return 'file';
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
function formatFileSize(bytes) {
|
| 81 |
+
if (bytes === 0) return '0 Bytes';
|
| 82 |
+
const k = 1024;
|
| 83 |
+
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
| 84 |
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
| 85 |
+
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
// Process files
|
| 89 |
+
processBtn.addEventListener('click', async function() {
|
| 90 |
+
if (files.length === 0) {
|
| 91 |
+
alert('Please select at least one file');
|
| 92 |
+
return;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
resultsContainer.innerHTML = '';
|
| 96 |
+
processedResults = [];
|
| 97 |
+
processBtn.disabled = true;
|
| 98 |
+
processBtn.innerHTML = '<i data-feather="loader" class="spinner mr-2"></i> Processing...';
|
| 99 |
+
feather.replace();
|
| 100 |
+
|
| 101 |
+
try {
|
| 102 |
+
for (const file of files) {
|
| 103 |
+
const result = await processFile(file);
|
| 104 |
+
processedResults.push(result);
|
| 105 |
+
displayResult(result);
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
resultsSection.classList.remove('hidden');
|
| 109 |
+
} catch (error) {
|
| 110 |
+
console.error('Error processing files:', error);
|
| 111 |
+
alert('An error occurred while processing files: ' + error.message);
|
| 112 |
+
} finally {
|
| 113 |
+
processBtn.disabled = false;
|
| 114 |
+
processBtn.innerHTML = '<i data-feather="cpu" class="mr-2"></i> Process Files';
|
| 115 |
+
feather.replace();
|
| 116 |
+
}
|
| 117 |
+
});
|
| 118 |
+
|
| 119 |
+
async function processFile(file) {
|
| 120 |
+
const format = outputFormat.value;
|
| 121 |
+
let content;
|
| 122 |
+
|
| 123 |
+
if (file.type.includes('pdf')) {
|
| 124 |
+
content = await extractTextFromPDF(file);
|
| 125 |
+
} else if (file.type.includes('word') || file.type.includes('document') ||
|
| 126 |
+
file.name.endsWith('.docx') || file.name.endsWith('.doc')) {
|
| 127 |
+
content = await extractTextFromWord(file);
|
| 128 |
+
} else if (file.type.includes('excel') || file.type.includes('spreadsheet') ||
|
| 129 |
+
file.name.endsWith('.xlsx') || file.name.endsWith('.xls')) {
|
| 130 |
+
content = await extractTextFromExcel(file);
|
| 131 |
+
} else if (file.type.includes('image')) {
|
| 132 |
+
content = await extractTextFromImage(file);
|
| 133 |
+
} else {
|
| 134 |
+
throw new Error('Unsupported file type: ' + file.type);
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
// Convert content to requested format
|
| 138 |
+
let formattedContent;
|
| 139 |
+
if (format === 'json') {
|
| 140 |
+
formattedContent = {
|
| 141 |
+
fileName: file.name,
|
| 142 |
+
fileType: file.type,
|
| 143 |
+
fileSize: file.size,
|
| 144 |
+
content: content,
|
| 145 |
+
extractedAt: new Date().toISOString()
|
| 146 |
+
};
|
| 147 |
+
formattedContent = JSON.stringify(formattedContent, null, 2);
|
| 148 |
+
} else if (format === 'markdown') {
|
| 149 |
+
formattedContent = `# ${file.name}\n\n`;
|
| 150 |
+
if (typeof content === 'string') {
|
| 151 |
+
formattedContent += content;
|
| 152 |
+
} else {
|
| 153 |
+
formattedContent += JSON.stringify(content, null, 2)
|
| 154 |
+
.replace(/\n/g, '\n\n')
|
| 155 |
+
.replace(/"([^"]+)":/g, '**$1**:');
|
| 156 |
+
}
|
| 157 |
+
} else {
|
| 158 |
+
// Plain text
|
| 159 |
+
if (typeof content === 'string') {
|
| 160 |
+
formattedContent = content;
|
| 161 |
+
} else {
|
| 162 |
+
formattedContent = JSON.stringify(content, null, 2);
|
| 163 |
+
}
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
return {
|
| 167 |
+
fileName: file.name,
|
| 168 |
+
content: formattedContent,
|
| 169 |
+
format: format
|
| 170 |
+
};
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
async function extractTextFromPDF(file) {
|
| 174 |
+
return new Promise((resolve, reject) => {
|
| 175 |
+
const reader = new FileReader();
|
| 176 |
+
|
| 177 |
+
reader.onload = async function(event) {
|
| 178 |
+
try {
|
| 179 |
+
const typedArray = new Uint8Array(event.target.result);
|
| 180 |
+
const pdf = await pdfjsLib.getDocument(typedArray).promise;
|
| 181 |
+
let text = '';
|
| 182 |
+
|
| 183 |
+
for (let i = 1; i <= pdf.numPages; i++) {
|
| 184 |
+
const page = await pdf.getPage(i);
|
| 185 |
+
const content = await page.getTextContent();
|
| 186 |
+
const strings = content.items.map(item => item.str);
|
| 187 |
+
text += strings.join(' ') + '\n\n';
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
resolve(text);
|
| 191 |
+
} catch (error) {
|
| 192 |
+
reject(error);
|
| 193 |
+
}
|
| 194 |
+
};
|
| 195 |
+
|
| 196 |
+
reader.onerror = reject;
|
| 197 |
+
reader.readAsArrayBuffer(file);
|
| 198 |
+
});
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
async function extractTextFromWord(file) {
|
| 202 |
+
return new Promise((resolve, reject) => {
|
| 203 |
+
const reader = new FileReader();
|
| 204 |
+
|
| 205 |
+
reader.onload = function(event) {
|
| 206 |
+
mammoth.extractRawText({ arrayBuffer: event.target.result })
|
| 207 |
+
.then(function(result) {
|
| 208 |
+
resolve(result.value);
|
| 209 |
+
})
|
| 210 |
+
.catch(reject);
|
| 211 |
+
};
|
| 212 |
+
|
| 213 |
+
reader.onerror = reject;
|
| 214 |
+
reader.readAsArrayBuffer(file);
|
| 215 |
+
});
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
async function extractTextFromExcel(file) {
|
| 219 |
+
return new Promise((resolve, reject) => {
|
| 220 |
+
const reader = new FileReader();
|
| 221 |
+
|
| 222 |
+
reader.onload = function(event) {
|
| 223 |
+
try {
|
| 224 |
+
const data = new Uint8Array(event.target.result);
|
| 225 |
+
const workbook = XLSX.read(data, { type: 'array' });
|
| 226 |
+
const result = {};
|
| 227 |
+
|
| 228 |
+
workbook.SheetNames.forEach(sheetName => {
|
| 229 |
+
const worksheet = workbook.Sheets[sheetName];
|
| 230 |
+
result[sheetName] = XLSX.utils.sheet_to_json(worksheet, { header: 1 });
|
| 231 |
+
});
|
| 232 |
+
|
| 233 |
+
resolve(result);
|
| 234 |
+
} catch (error) {
|
| 235 |
+
reject(error);
|
| 236 |
+
}
|
| 237 |
+
};
|
| 238 |
+
|
| 239 |
+
reader.onerror = reject;
|
| 240 |
+
reader.readAsArrayBuffer(file);
|
| 241 |
+
});
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
async function extractTextFromImage(file) {
|
| 245 |
+
return new Promise((resolve, reject) => {
|
| 246 |
+
Tesseract.recognize(
|
| 247 |
+
file,
|
| 248 |
+
'eng',
|
| 249 |
+
{ logger: m => console.log(m) }
|
| 250 |
+
).then(({ data: { text } }) => {
|
| 251 |
+
resolve(text);
|
| 252 |
+
}).catch(reject);
|
| 253 |
+
});
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
function displayResult(result) {
|
| 257 |
+
const resultCard = document.createElement('div');
|
| 258 |
+
resultCard.className = 'bg-gray-50 rounded-lg p-4 shadow-sm';
|
| 259 |
+
|
| 260 |
+
const header = document.createElement('div');
|
| 261 |
+
header.className = 'flex justify-between items-center mb-3';
|
| 262 |
+
|
| 263 |
+
const title = document.createElement('h3');
|
| 264 |
+
title.className = 'font-semibold text-lg text-gray-800 truncate';
|
| 265 |
+
title.textContent = result.fileName;
|
| 266 |
+
|
| 267 |
+
const downloadBtn = document.createElement('button');
|
| 268 |
+
downloadBtn.className = 'bg-indigo-600 hover:bg-indigo-700 text-white font-medium py-1 px-3 rounded transition duration-200 flex items-center text-sm';
|
| 269 |
+
downloadBtn.innerHTML = '<i data-feather="download" class="mr-1"></i> Download';
|
| 270 |
+
downloadBtn.addEventListener('click', () => downloadResult(result));
|
| 271 |
+
|
| 272 |
+
header.appendChild(title);
|
| 273 |
+
header.appendChild(downloadBtn);
|
| 274 |
+
|
| 275 |
+
const content = document.createElement('div');
|
| 276 |
+
|
| 277 |
+
if (result.format === 'json') {
|
| 278 |
+
const pre = document.createElement('pre');
|
| 279 |
+
pre.textContent = result.content;
|
| 280 |
+
content.appendChild(pre);
|
| 281 |
+
} else {
|
| 282 |
+
const pre = document.createElement('pre');
|
| 283 |
+
pre.textContent = result.content;
|
| 284 |
+
content.appendChild(pre);
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
resultCard.appendChild(header);
|
| 288 |
+
resultCard.appendChild(content);
|
| 289 |
+
|
| 290 |
+
resultsContainer.appendChild(resultCard);
|
| 291 |
+
feather.replace();
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
function downloadResult(result) {
|
| 295 |
+
const blob = new Blob([result.content], { type: 'text/plain' });
|
| 296 |
+
const url = URL.createObjectURL(blob);
|
| 297 |
+
const a = document.createElement('a');
|
| 298 |
+
a.href = url;
|
| 299 |
+
a.download = `${result.fileName.split('.')[0]}.${result.format}`;
|
| 300 |
+
document.body.appendChild(a);
|
| 301 |
+
a.click();
|
| 302 |
+
document.body.removeChild(a);
|
| 303 |
+
URL.revokeObjectURL(url);
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
downloadAllBtn.addEventListener('click', () => {
|
| 307 |
+
processedResults.forEach(result => {
|
| 308 |
+
downloadResult(result);
|
| 309 |
+
});
|
| 310 |
+
});
|
| 311 |
+
});
|
style.css
CHANGED
|
@@ -1,28 +1,33 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
}
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
margin-top: 0;
|
| 9 |
}
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
font-size: 15px;
|
| 14 |
-
margin-bottom: 10px;
|
| 15 |
-
margin-top: 5px;
|
| 16 |
}
|
| 17 |
|
| 18 |
-
.card {
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
padding: 16px;
|
| 22 |
-
border: 1px solid lightgray;
|
| 23 |
-
border-radius: 16px;
|
| 24 |
}
|
| 25 |
|
| 26 |
-
.
|
| 27 |
-
|
|
|
|
| 28 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@keyframes spin {
|
| 2 |
+
0% { transform: rotate(0deg); }
|
| 3 |
+
100% { transform: rotate(360deg); }
|
| 4 |
}
|
| 5 |
|
| 6 |
+
.spinner {
|
| 7 |
+
animation: spin 1s linear infinite;
|
|
|
|
| 8 |
}
|
| 9 |
|
| 10 |
+
.file-card {
|
| 11 |
+
transition: all 0.2s ease-in-out;
|
|
|
|
|
|
|
|
|
|
| 12 |
}
|
| 13 |
|
| 14 |
+
.file-card:hover {
|
| 15 |
+
transform: translateY(-2px);
|
| 16 |
+
box-shadow: 0 6px 12px rgba(0, 0, 0, 0.1);
|
|
|
|
|
|
|
|
|
|
| 17 |
}
|
| 18 |
|
| 19 |
+
.preview-image {
|
| 20 |
+
max-height: 150px;
|
| 21 |
+
object-fit: contain;
|
| 22 |
}
|
| 23 |
+
|
| 24 |
+
#resultsContainer pre {
|
| 25 |
+
background-color: #f8f9fa;
|
| 26 |
+
padding: 16px;
|
| 27 |
+
border-radius: 8px;
|
| 28 |
+
overflow-x: auto;
|
| 29 |
+
white-space: pre-wrap;
|
| 30 |
+
word-wrap: break-word;
|
| 31 |
+
max-height: 500px;
|
| 32 |
+
overflow-y: auto;
|
| 33 |
+
}
|