Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -59,8 +59,8 @@ if 'model_loaded' not in st.session_state:
|
|
| 59 |
st.session_state['model_loaded'] = False
|
| 60 |
if 'processing' not in st.session_state:
|
| 61 |
st.session_state['processing'] = {}
|
| 62 |
-
if '
|
| 63 |
-
st.session_state['
|
| 64 |
if 'downloaded_pdfs' not in st.session_state:
|
| 65 |
st.session_state['downloaded_pdfs'] = {}
|
| 66 |
|
|
@@ -329,7 +329,7 @@ def get_model_files(model_type="causal_lm"):
|
|
| 329 |
path = "models/*" if model_type == "causal_lm" else "diffusion_models/*"
|
| 330 |
return [d for d in glob.glob(path) if os.path.isdir(d)]
|
| 331 |
|
| 332 |
-
def get_gallery_files(file_types=["png"]):
|
| 333 |
return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
|
| 334 |
|
| 335 |
def get_pdf_files():
|
|
@@ -367,11 +367,11 @@ async def process_pdf_snapshot(pdf_path, mode="single"):
|
|
| 367 |
output_file = generate_filename(f"twopage_{i}", "png")
|
| 368 |
pix.save(output_file)
|
| 369 |
output_files.append(output_file)
|
| 370 |
-
elif mode == "
|
| 371 |
for i in range(len(doc)):
|
| 372 |
page = doc[i]
|
| 373 |
-
pix = page.get_pixmap(matrix=fitz.Matrix(0
|
| 374 |
-
output_file = generate_filename(f"
|
| 375 |
pix.save(output_file)
|
| 376 |
output_files.append(output_file)
|
| 377 |
doc.close()
|
|
@@ -506,19 +506,56 @@ def calculate_cargo_travel_time(origin_coords: Tuple[float, float], destination_
|
|
| 506 |
st.title("AI Vision & SFT Titans 🚀")
|
| 507 |
|
| 508 |
st.sidebar.header("Captured Files 📜")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 2)
|
| 510 |
def update_gallery():
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
st.sidebar.subheader("Images 📸")
|
| 515 |
cols = st.sidebar.columns(2)
|
| 516 |
-
for idx, file in enumerate(
|
| 517 |
with cols[idx % 2]:
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
update_gallery()
|
| 523 |
|
| 524 |
st.sidebar.subheader("Model Management 🗂️")
|
|
@@ -625,44 +662,12 @@ with tab2:
|
|
| 625 |
status_text.text("Robo-Download complete! 🚀")
|
| 626 |
update_gallery()
|
| 627 |
|
| 628 |
-
st.
|
| 629 |
-
downloaded_pdfs = list(st.session_state['downloaded_pdfs'].values())
|
| 630 |
-
if downloaded_pdfs:
|
| 631 |
-
cols_per_row = 3
|
| 632 |
-
for i in range(0, len(downloaded_pdfs), cols_per_row):
|
| 633 |
-
cols = st.columns(cols_per_row)
|
| 634 |
-
for j, pdf_path in enumerate(downloaded_pdfs[i:i + cols_per_row]):
|
| 635 |
-
with cols[j]:
|
| 636 |
-
doc = fitz.open(pdf_path)
|
| 637 |
-
page = doc[0]
|
| 638 |
-
pix = page.get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
|
| 639 |
-
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 640 |
-
st.image(img, caption=os.path.basename(pdf_path), use_container_width=True)
|
| 641 |
-
checkbox_key = f"pdf_{pdf_path}"
|
| 642 |
-
st.session_state['pdf_checkboxes'][checkbox_key] = st.checkbox(
|
| 643 |
-
"Use for SFT/Input",
|
| 644 |
-
value=st.session_state['pdf_checkboxes'].get(checkbox_key, False),
|
| 645 |
-
key=checkbox_key
|
| 646 |
-
)
|
| 647 |
-
st.markdown(get_download_link(pdf_path, "application/pdf", "Snag It! 📥"), unsafe_allow_html=True)
|
| 648 |
-
if st.button("Zap It! 🗑️", key=f"delete_{pdf_path}"):
|
| 649 |
-
os.remove(pdf_path)
|
| 650 |
-
url_key = next((k for k, v in st.session_state['downloaded_pdfs'].items() if v == pdf_path), None)
|
| 651 |
-
if url_key:
|
| 652 |
-
del st.session_state['downloaded_pdfs'][url_key]
|
| 653 |
-
del st.session_state['pdf_checkboxes'][checkbox_key]
|
| 654 |
-
st.success(f"PDF {os.path.basename(pdf_path)} vaporized! 💨")
|
| 655 |
-
st.rerun()
|
| 656 |
-
doc.close()
|
| 657 |
-
else:
|
| 658 |
-
st.info("No PDFs captured yet. Feed the robo-downloader some URLs! 🤖")
|
| 659 |
-
|
| 660 |
-
mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (Thumbnails)"], key="download_mode")
|
| 661 |
if st.button("Snapshot Selected 📸"):
|
| 662 |
-
selected_pdfs = [path for
|
| 663 |
if selected_pdfs:
|
| 664 |
for pdf_path in selected_pdfs:
|
| 665 |
-
mode_key = {"Single Page (High-Res)": "single", "Two Pages (High-Res)": "twopage", "All Pages (
|
| 666 |
snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
|
| 667 |
for snapshot in snapshots:
|
| 668 |
st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
|
|
@@ -727,12 +732,9 @@ with tab4:
|
|
| 727 |
st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Titan"), unsafe_allow_html=True)
|
| 728 |
st.rerun()
|
| 729 |
elif isinstance(st.session_state['builder'], DiffusionBuilder):
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
demo_data = [{"image": img, "text": f"Superhero {os.path.basename(img).split('.')[0]}"} for img in captured_files]
|
| 734 |
-
for pdf_path in selected_pdfs:
|
| 735 |
-
demo_data.append({"image": pdf_path, "text": f"PDF {os.path.basename(pdf_path)}"})
|
| 736 |
edited_data = st.data_editor(pd.DataFrame(demo_data), num_rows="dynamic")
|
| 737 |
if st.button("Fine-Tune with Dataset 🔄"):
|
| 738 |
images = [Image.open(row["image"]) if row["image"].endswith('.png') else Image.frombytes("RGB", fitz.open(row["image"])[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0)).size, fitz.open(row["image"])[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0)).samples) for _, row in edited_data.iterrows()]
|
|
@@ -785,7 +787,6 @@ with tab5:
|
|
| 785 |
status_container.empty()
|
| 786 |
elif isinstance(st.session_state['builder'], DiffusionBuilder):
|
| 787 |
test_prompt = st.text_area("Enter Test Prompt", "Neon Batman")
|
| 788 |
-
selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
|
| 789 |
if st.button("Run Test ▶️"):
|
| 790 |
image = st.session_state['builder'].generate(test_prompt)
|
| 791 |
output_file = generate_filename("diffusion_test", "png")
|
|
@@ -828,9 +829,7 @@ with tab6:
|
|
| 828 |
|
| 829 |
with tab7:
|
| 830 |
st.header("Test OCR 🔍")
|
| 831 |
-
|
| 832 |
-
selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
|
| 833 |
-
all_files = captured_files + selected_pdfs
|
| 834 |
if all_files:
|
| 835 |
selected_file = st.selectbox("Select Image or PDF", all_files, key="ocr_select")
|
| 836 |
if selected_file:
|
|
@@ -853,13 +852,11 @@ with tab7:
|
|
| 853 |
st.success(f"OCR output saved to {output_file}")
|
| 854 |
st.session_state['processing']['ocr'] = False
|
| 855 |
else:
|
| 856 |
-
st.warning("No images or PDFs
|
| 857 |
|
| 858 |
with tab8:
|
| 859 |
st.header("Test Image Gen 🎨")
|
| 860 |
-
|
| 861 |
-
selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
|
| 862 |
-
all_files = captured_files + selected_pdfs
|
| 863 |
if all_files:
|
| 864 |
selected_file = st.selectbox("Select Image or PDF", all_files, key="gen_select")
|
| 865 |
if selected_file:
|
|
@@ -883,14 +880,12 @@ with tab8:
|
|
| 883 |
st.success(f"Image saved to {output_file}")
|
| 884 |
st.session_state['processing']['gen'] = False
|
| 885 |
else:
|
| 886 |
-
st.warning("No images or PDFs
|
| 887 |
|
| 888 |
with tab9:
|
| 889 |
st.header("Custom Diffusion 🎨🤓")
|
| 890 |
st.write("Unleash your inner artist with our tiny diffusion models!")
|
| 891 |
-
|
| 892 |
-
selected_pdfs = [path for key, path in st.session_state['downloaded_pdfs'].items() if st.session_state['pdf_checkboxes'].get(f"pdf_{path}", False)]
|
| 893 |
-
all_files = captured_files + selected_pdfs
|
| 894 |
if all_files:
|
| 895 |
st.subheader("Select Images or PDFs to Train")
|
| 896 |
selected_files = st.multiselect("Pick Images or PDFs", all_files, key="diffusion_select")
|
|
@@ -929,6 +924,6 @@ with tab9:
|
|
| 929 |
st.success(f"Image saved to {output_file}")
|
| 930 |
st.session_state['processing']['diffusion'] = False
|
| 931 |
else:
|
| 932 |
-
st.warning("No images or PDFs
|
| 933 |
|
| 934 |
update_gallery()
|
|
|
|
| 59 |
st.session_state['model_loaded'] = False
|
| 60 |
if 'processing' not in st.session_state:
|
| 61 |
st.session_state['processing'] = {}
|
| 62 |
+
if 'asset_checkboxes' not in st.session_state:
|
| 63 |
+
st.session_state['asset_checkboxes'] = {}
|
| 64 |
if 'downloaded_pdfs' not in st.session_state:
|
| 65 |
st.session_state['downloaded_pdfs'] = {}
|
| 66 |
|
|
|
|
| 329 |
path = "models/*" if model_type == "causal_lm" else "diffusion_models/*"
|
| 330 |
return [d for d in glob.glob(path) if os.path.isdir(d)]
|
| 331 |
|
| 332 |
+
def get_gallery_files(file_types=["png", "pdf"]):
|
| 333 |
return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
|
| 334 |
|
| 335 |
def get_pdf_files():
|
|
|
|
| 367 |
output_file = generate_filename(f"twopage_{i}", "png")
|
| 368 |
pix.save(output_file)
|
| 369 |
output_files.append(output_file)
|
| 370 |
+
elif mode == "allpages":
|
| 371 |
for i in range(len(doc)):
|
| 372 |
page = doc[i]
|
| 373 |
+
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
| 374 |
+
output_file = generate_filename(f"page_{i}", "png")
|
| 375 |
pix.save(output_file)
|
| 376 |
output_files.append(output_file)
|
| 377 |
doc.close()
|
|
|
|
| 506 |
st.title("AI Vision & SFT Titans 🚀")
|
| 507 |
|
| 508 |
st.sidebar.header("Captured Files 📜")
|
| 509 |
+
cols = st.sidebar.columns(2)
|
| 510 |
+
with cols[0]:
|
| 511 |
+
if st.button("Zip All 🤐"):
|
| 512 |
+
zip_path = f"all_assets_{int(time.time())}.zip"
|
| 513 |
+
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 514 |
+
for file in get_gallery_files():
|
| 515 |
+
zipf.write(file, os.path.basename(file))
|
| 516 |
+
st.sidebar.markdown(get_download_link(zip_path, "application/zip", "Download All Assets"), unsafe_allow_html=True)
|
| 517 |
+
with cols[1]:
|
| 518 |
+
if st.button("Zap All! 🗑️"):
|
| 519 |
+
for file in get_gallery_files():
|
| 520 |
+
os.remove(file)
|
| 521 |
+
st.session_state['asset_checkboxes'].clear()
|
| 522 |
+
st.session_state['downloaded_pdfs'].clear()
|
| 523 |
+
st.sidebar.success("All assets vaporized! 💨")
|
| 524 |
+
st.rerun()
|
| 525 |
+
|
| 526 |
gallery_size = st.sidebar.slider("Gallery Size", 1, 10, 2)
|
| 527 |
def update_gallery():
|
| 528 |
+
all_files = get_gallery_files()
|
| 529 |
+
if all_files:
|
| 530 |
+
st.sidebar.subheader("Asset Gallery 📸📖")
|
|
|
|
| 531 |
cols = st.sidebar.columns(2)
|
| 532 |
+
for idx, file in enumerate(all_files[:gallery_size * 2]):
|
| 533 |
with cols[idx % 2]:
|
| 534 |
+
if file.endswith('.png'):
|
| 535 |
+
st.image(Image.open(file), caption=os.path.basename(file), use_container_width=True)
|
| 536 |
+
else:
|
| 537 |
+
doc = fitz.open(file)
|
| 538 |
+
pix = doc[0].get_pixmap(matrix=fitz.Matrix(0.5, 0.5))
|
| 539 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 540 |
+
st.image(img, caption=os.path.basename(file), use_container_width=True)
|
| 541 |
+
doc.close()
|
| 542 |
+
checkbox_key = f"asset_{file}"
|
| 543 |
+
st.session_state['asset_checkboxes'][checkbox_key] = st.checkbox(
|
| 544 |
+
"Use for SFT/Input",
|
| 545 |
+
value=st.session_state['asset_checkboxes'].get(checkbox_key, False),
|
| 546 |
+
key=checkbox_key
|
| 547 |
+
)
|
| 548 |
+
mime_type = "image/png" if file.endswith('.png') else "application/pdf"
|
| 549 |
+
st.markdown(get_download_link(file, mime_type, "Snag It! 📥"), unsafe_allow_html=True)
|
| 550 |
+
if st.button("Zap It! 🗑️", key=f"delete_{file}"):
|
| 551 |
+
os.remove(file)
|
| 552 |
+
del st.session_state['asset_checkboxes'][checkbox_key]
|
| 553 |
+
if file.endswith('.pdf'):
|
| 554 |
+
url_key = next((k for k, v in st.session_state['downloaded_pdfs'].items() if v == file), None)
|
| 555 |
+
if url_key:
|
| 556 |
+
del st.session_state['downloaded_pdfs'][url_key]
|
| 557 |
+
st.sidebar.success(f"Asset {os.path.basename(file)} vaporized! 💨")
|
| 558 |
+
st.rerun()
|
| 559 |
update_gallery()
|
| 560 |
|
| 561 |
st.sidebar.subheader("Model Management 🗂️")
|
|
|
|
| 662 |
status_text.text("Robo-Download complete! 🚀")
|
| 663 |
update_gallery()
|
| 664 |
|
| 665 |
+
mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
if st.button("Snapshot Selected 📸"):
|
| 667 |
+
selected_pdfs = [path for path in get_gallery_files() if path.endswith('.pdf') and st.session_state['asset_checkboxes'].get(f"asset_{path}", False)]
|
| 668 |
if selected_pdfs:
|
| 669 |
for pdf_path in selected_pdfs:
|
| 670 |
+
mode_key = {"Single Page (High-Res)": "single", "Two Pages (High-Res)": "twopage", "All Pages (High-Res)": "allpages"}[mode]
|
| 671 |
snapshots = asyncio.run(process_pdf_snapshot(pdf_path, mode_key))
|
| 672 |
for snapshot in snapshots:
|
| 673 |
st.image(Image.open(snapshot), caption=snapshot, use_container_width=True)
|
|
|
|
| 732 |
st.markdown(get_download_link(zip_path, "application/zip", "Download Fine-Tuned Titan"), unsafe_allow_html=True)
|
| 733 |
st.rerun()
|
| 734 |
elif isinstance(st.session_state['builder'], DiffusionBuilder):
|
| 735 |
+
selected_files = [path for path in get_gallery_files() if st.session_state['asset_checkboxes'].get(f"asset_{path}", False)]
|
| 736 |
+
if len(selected_files) >= 2:
|
| 737 |
+
demo_data = [{"image": file, "text": f"Asset {os.path.basename(file).split('.')[0]}"} for file in selected_files]
|
|
|
|
|
|
|
|
|
|
| 738 |
edited_data = st.data_editor(pd.DataFrame(demo_data), num_rows="dynamic")
|
| 739 |
if st.button("Fine-Tune with Dataset 🔄"):
|
| 740 |
images = [Image.open(row["image"]) if row["image"].endswith('.png') else Image.frombytes("RGB", fitz.open(row["image"])[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0)).size, fitz.open(row["image"])[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0)).samples) for _, row in edited_data.iterrows()]
|
|
|
|
| 787 |
status_container.empty()
|
| 788 |
elif isinstance(st.session_state['builder'], DiffusionBuilder):
|
| 789 |
test_prompt = st.text_area("Enter Test Prompt", "Neon Batman")
|
|
|
|
| 790 |
if st.button("Run Test ▶️"):
|
| 791 |
image = st.session_state['builder'].generate(test_prompt)
|
| 792 |
output_file = generate_filename("diffusion_test", "png")
|
|
|
|
| 829 |
|
| 830 |
with tab7:
|
| 831 |
st.header("Test OCR 🔍")
|
| 832 |
+
all_files = [path for path in get_gallery_files() if st.session_state['asset_checkboxes'].get(f"asset_{path}", False)]
|
|
|
|
|
|
|
| 833 |
if all_files:
|
| 834 |
selected_file = st.selectbox("Select Image or PDF", all_files, key="ocr_select")
|
| 835 |
if selected_file:
|
|
|
|
| 852 |
st.success(f"OCR output saved to {output_file}")
|
| 853 |
st.session_state['processing']['ocr'] = False
|
| 854 |
else:
|
| 855 |
+
st.warning("No images or PDFs selected yet. Check some boxes in the sidebar gallery!")
|
| 856 |
|
| 857 |
with tab8:
|
| 858 |
st.header("Test Image Gen 🎨")
|
| 859 |
+
all_files = [path for path in get_gallery_files() if st.session_state['asset_checkboxes'].get(f"asset_{path}", False)]
|
|
|
|
|
|
|
| 860 |
if all_files:
|
| 861 |
selected_file = st.selectbox("Select Image or PDF", all_files, key="gen_select")
|
| 862 |
if selected_file:
|
|
|
|
| 880 |
st.success(f"Image saved to {output_file}")
|
| 881 |
st.session_state['processing']['gen'] = False
|
| 882 |
else:
|
| 883 |
+
st.warning("No images or PDFs selected yet. Check some boxes in the sidebar gallery!")
|
| 884 |
|
| 885 |
with tab9:
|
| 886 |
st.header("Custom Diffusion 🎨🤓")
|
| 887 |
st.write("Unleash your inner artist with our tiny diffusion models!")
|
| 888 |
+
all_files = [path for path in get_gallery_files() if st.session_state['asset_checkboxes'].get(f"asset_{path}", False)]
|
|
|
|
|
|
|
| 889 |
if all_files:
|
| 890 |
st.subheader("Select Images or PDFs to Train")
|
| 891 |
selected_files = st.multiselect("Pick Images or PDFs", all_files, key="diffusion_select")
|
|
|
|
| 924 |
st.success(f"Image saved to {output_file}")
|
| 925 |
st.session_state['processing']['diffusion'] = False
|
| 926 |
else:
|
| 927 |
+
st.warning("No images or PDFs selected yet. Check some boxes in the sidebar gallery!")
|
| 928 |
|
| 929 |
update_gallery()
|