prithivMLmods commited on
Commit
a9ff218
·
verified ·
1 Parent(s): 4c61415

update app

Browse files
Files changed (1) hide show
  1. app.py +20 -152
app.py CHANGED
@@ -4,7 +4,7 @@ import math
4
  import os
5
  import traceback
6
  from io import BytesIO
7
- from typing import Any, Dict, List, Optional, Tuple, Iterable
8
  import re
9
  import time
10
  from threading import Thread
@@ -21,6 +21,7 @@ import numpy as np
21
  import torchvision.transforms as T
22
  from torchvision.transforms.functional import InterpolationMode
23
 
 
24
  from transformers import (
25
  Qwen2_5_VLForConditionalGeneration,
26
  Qwen2VLForConditionalGeneration,
@@ -42,144 +43,6 @@ from reportlab.lib.styles import getSampleStyleSheet
42
  from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
43
  from reportlab.lib.units import inch
44
 
45
- from gradio.themes import Soft
46
- from gradio.themes.utils import colors, fonts, sizes
47
-
48
-
49
- # --- Theme and CSS Definition ---
50
-
51
- # Define the Thistle color palette
52
- colors.thistle = colors.Color(
53
- name="thistle",
54
- c50="#F9F5F9",
55
- c100="#F0E8F1",
56
- c200="#E7DBE8",
57
- c300="#DECEE0",
58
- c400="#D2BFD8",
59
- c500="#D8BFD8", # Thistle base color
60
- c600="#B59CB7",
61
- c700="#927996",
62
- c800="#6F5675",
63
- c900="#4C3454",
64
- c950="#291233",
65
- )
66
-
67
- colors.red_gray = colors.Color(
68
- name="red_gray",
69
- c50="#f7eded", c100="#f5dcdc", c200="#efb4b4", c300="#e78f8f",
70
- c400="#d96a6a", c500="#c65353", c600="#b24444", c700="#8f3434",
71
- c800="#732d2d", c900="#5f2626", c950="#4d2020",
72
- )
73
-
74
- class ThistleTheme(Soft):
75
- def __init__(
76
- self,
77
- *,
78
- primary_hue: colors.Color | str = colors.gray,
79
- secondary_hue: colors.Color | str = colors.thistle, # Use the new color
80
- neutral_hue: colors.Color | str = colors.slate,
81
- text_size: sizes.Size | str = sizes.text_lg,
82
- font: fonts.Font | str | Iterable[fonts.Font | str] = (
83
- fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
84
- ),
85
- font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
86
- fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
87
- ),
88
- ):
89
- super().__init__(
90
- primary_hue=primary_hue,
91
- secondary_hue=secondary_hue,
92
- neutral_hue=neutral_hue,
93
- text_size=text_size,
94
- font=font,
95
- font_mono=font_mono,
96
- )
97
- super().set(
98
- background_fill_primary="*primary_50",
99
- background_fill_primary_dark="*primary_900",
100
- body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
101
- body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
102
- button_primary_text_color="black",
103
- button_primary_text_color_hover="white",
104
- button_primary_background_fill="linear-gradient(90deg, *secondary_400, *secondary_500)",
105
- button_primary_background_fill_hover="linear-gradient(90deg, *secondary_500, *secondary_600)",
106
- button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_700)",
107
- button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_600)",
108
- button_secondary_text_color="black",
109
- button_secondary_text_color_hover="white",
110
- button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
111
- button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
112
- button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
113
- button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
114
- slider_color="*secondary_400",
115
- slider_color_dark="*secondary_600",
116
- block_title_text_weight="600",
117
- block_border_width="3px",
118
- block_shadow="*shadow_drop_lg",
119
- button_primary_shadow="*shadow_drop_lg",
120
- button_large_padding="11px",
121
- color_accent_soft="*primary_100",
122
- block_label_background_fill="*primary_200",
123
- )
124
-
125
- # Instantiate the new theme
126
- thistle_theme = ThistleTheme()
127
-
128
- css = """
129
- #main-title h1 {
130
- font-size: 2.3em !important;
131
- }
132
- #output-title h2 {
133
- font-size: 2.1em !important;
134
- }
135
- :root {
136
- --color-grey-50: #f9fafb;
137
- --banner-background: var(--secondary-400);
138
- --banner-text-color: var(--primary-100);
139
- --banner-background-dark: var(--secondary-800);
140
- --banner-text-color-dark: var(--primary-100);
141
- --banner-chrome-height: calc(16px + 43px);
142
- --chat-chrome-height-wide-no-banner: 320px;
143
- --chat-chrome-height-narrow-no-banner: 450px;
144
- --chat-chrome-height-wide: calc(var(--chat-chrome-height-wide-no-banner) + var(--banner-chrome-height));
145
- --chat-chrome-height-narrow: calc(var(--chat-chrome-height-narrow-no-banner) + var(--banner-chrome-height));
146
- }
147
- .banner-message { background-color: var(--banner-background); padding: 5px; margin: 0; border-radius: 5px; border: none; }
148
- .banner-message-text { font-size: 13px; font-weight: bolder; color: var(--banner-text-color) !important; }
149
- body.dark .banner-message { background-color: var(--banner-background-dark) !important; }
150
- body.dark .gradio-container .contain .banner-message .banner-message-text { color: var(--banner-text-color-dark) !important; }
151
- .toast-body { background-color: var(--color-grey-50); }
152
- .html-container:has(.css-styles) { padding: 0; margin: 0; }
153
- .css-styles { height: 0; }
154
- .model-message { text-align: end; }
155
- .model-dropdown-container { display: flex; align-items: center; gap: 10px; padding: 0; }
156
- .user-input-container .multimodal-textbox{ border: none !important; }
157
- .control-button { height: 51px; }
158
- button.cancel { border: var(--button-border-width) solid var(--button-cancel-border-color); background: var(--button-cancel-background-fill); color: var(--button-cancel-text-color); box-shadow: var(--button-cancel-shadow); }
159
- button.cancel:hover, .cancel[disabled] { background: var(--button-cancel-background-fill-hover); color: var(--button-cancel-text-color-hover); }
160
- .opt-out-message { top: 8px; }
161
- .opt-out-message .html-container, .opt-out-checkbox label { font-size: 14px !important; padding: 0 !important; margin: 0 !important; color: var(--neutral-400) !important; }
162
- div.block.chatbot { height: calc(100svh - var(--chat-chrome-height-wide)) !important; max-height: 900px !important; }
163
- div.no-padding { padding: 0 !important; }
164
- #gallery { min-height: 400px; }
165
- @media (max-width: 1280px) { div.block.chatbot { height: calc(100svh - var(--chat-chrome-height-wide)) !important; } }
166
- @media (max-width: 1024px) {
167
- .responsive-row { flex-direction: column; }
168
- .model-message { text-align: start; font-size: 10px !important; }
169
- .model-dropdown-container { flex-direction: column; align-items: flex-start; }
170
- div.block.chatbot { height: calc(100svh - var(--chat-chrome-height-narrow)) !important; }
171
- }
172
- @media (max-width: 400px) {
173
- .responsive-row { flex-direction: column; }
174
- .model-message { text-align: start; font-size: 10px !important; }
175
- .model-dropdown-container { flex-direction: column; align-items: flex-start; }
176
- div.block.chatbot { max-height: 360px !important; }
177
- }
178
- @media (max-height: 932px) { .chatbot { max-height: 500px !important; } }
179
- @media (max-height: 1280px) { div.block.chatbot { max-height: 800px !important; } }
180
- """
181
-
182
-
183
  # --- Constants and Model Setup ---
184
  MAX_INPUT_TOKEN_LENGTH = 4096
185
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -551,11 +414,16 @@ def process_document_stream(
551
  # --- Gradio UI Definition ---
552
  def create_gradio_interface():
553
  """Builds and returns the Gradio web interface."""
554
-
555
- with gr.Blocks(theme=thistle_theme, css=css) as demo:
 
 
 
 
 
556
  gr.HTML("""
557
  <div class="title" style="text-align: center">
558
- <h1 id="main-title">Tiny VLMs Lab🧪</h1>
559
  <p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
560
  Tiny VLMs for Image Content Extraction and Understanding
561
  </p>
@@ -572,14 +440,14 @@ def create_gradio_interface():
572
  "Qwen2.5-VL-3B-Abliterated-Caption-it(caption)", "Nanonets-OCR-s(ocr)",
573
  "LMM-R1-MGT-PerceReason(reason)", "OCRFlux-3B(ocr)", "TBAC-VLR1-3B(open-r1)",
574
  "SmolVLM-500M-Instruct(smol)", "llava-onevision-qwen2-0.5b-ov-hf(mini)"],
575
- label="Select Model", value= "Megalodon-OCR-Sync-0713(ocr)"
576
  )
577
 
578
- prompt_input = gr.Textbox(label="Query Input", placeholder="Enter your query here...")
579
- image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'], height=290)
580
 
581
  with gr.Accordion("Advanced Settings (PDF)", open=False):
582
- max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=1024, step=256, label="Max New Tokens")
583
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
584
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
585
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
@@ -591,22 +459,22 @@ def create_gradio_interface():
591
  alignment = gr.Dropdown(choices=["Left", "Center", "Right", "Justified"], value="Justified", label="Text Alignment")
592
  image_size = gr.Dropdown(choices=["Small", "Medium", "Large"], value="Medium", label="Image Size in PDF")
593
 
594
- with gr.Row():
595
- process_btn = gr.Button("Process Image", variant="primary", size="lg", scale=2)
596
- clear_btn = gr.Button("Clear All", variant="secondary", scale=1)
597
 
598
  # Right Column (Outputs)
599
  with gr.Column(scale=2):
600
  with gr.Tabs() as tabs:
601
  with gr.Tab("📝 Extracted Content"):
602
- raw_output_stream = gr.Textbox(label="Raw Output Stream", interactive=False, lines=15, show_copy_button=True)
603
  with gr.Row():
604
  examples = gr.Examples(
605
  examples=["examples/1.png", "examples/2.png", "examples/3.png",
606
  "examples/4.png", "examples/5.png", "examples/6.png"],
607
  inputs=image_input, label="Examples"
608
  )
609
-
 
610
  with gr.Tab("📰 README.md"):
611
  with gr.Accordion("(Result.md)", open=True):
612
  markdown_output = gr.Markdown()
@@ -641,4 +509,4 @@ def create_gradio_interface():
641
  if __name__ == "__main__":
642
  demo = create_gradio_interface()
643
 
644
- demo.queue(max_size=50).launch(ssr_mode=False, mcp_server=True, show_error=True)
 
4
  import os
5
  import traceback
6
  from io import BytesIO
7
+ from typing import Any, Dict, List, Optional, Tuple
8
  import re
9
  import time
10
  from threading import Thread
 
21
  import torchvision.transforms as T
22
  from torchvision.transforms.functional import InterpolationMode
23
 
24
+
25
  from transformers import (
26
  Qwen2_5_VLForConditionalGeneration,
27
  Qwen2VLForConditionalGeneration,
 
43
  from reportlab.platypus import SimpleDocTemplate, Image as RLImage, Paragraph, Spacer
44
  from reportlab.lib.units import inch
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # --- Constants and Model Setup ---
47
  MAX_INPUT_TOKEN_LENGTH = 4096
48
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
414
  # --- Gradio UI Definition ---
415
  def create_gradio_interface():
416
  """Builds and returns the Gradio web interface."""
417
+ css = """
418
+ .main-container { max-width: 1400px; margin: 0 auto; }
419
+ .process-button { border: none !important; color: white !important; font-weight: bold !important; background-color: blue !important;}
420
+ .process-button:hover { background-color: darkblue !important; transform: translateY(-2px) !important; box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important; }
421
+ #gallery { min-height: 400px; }
422
+ """
423
+ with gr.Blocks(theme="bethecloud/storj_theme", css=css) as demo:
424
  gr.HTML("""
425
  <div class="title" style="text-align: center">
426
+ <h1>Tiny VLMs Lab🧪</h1>
427
  <p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
428
  Tiny VLMs for Image Content Extraction and Understanding
429
  </p>
 
440
  "Qwen2.5-VL-3B-Abliterated-Caption-it(caption)", "Nanonets-OCR-s(ocr)",
441
  "LMM-R1-MGT-PerceReason(reason)", "OCRFlux-3B(ocr)", "TBAC-VLR1-3B(open-r1)",
442
  "SmolVLM-500M-Instruct(smol)", "llava-onevision-qwen2-0.5b-ov-hf(mini)"],
443
+ label="Select Model", value= "LFM2-VL-450M(fast)"
444
  )
445
 
446
+ prompt_input = gr.Textbox(label="Query Input", placeholder="✦︎ Enter the prompt")
447
+ image_input = gr.Image(label="Upload Image", type="pil", sources=['upload'])
448
 
449
  with gr.Accordion("Advanced Settings (PDF)", open=False):
450
+ max_new_tokens = gr.Slider(minimum=512, maximum=8192, value=2048, step=256, label="Max New Tokens")
451
  temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=4.0, step=0.1, value=0.6)
452
  top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9)
453
  top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50)
 
459
  alignment = gr.Dropdown(choices=["Left", "Center", "Right", "Justified"], value="Justified", label="Text Alignment")
460
  image_size = gr.Dropdown(choices=["Small", "Medium", "Large"], value="Medium", label="Image Size in PDF")
461
 
462
+ process_btn = gr.Button("🚀 Process Image", variant="primary", elem_classes=["process-button"], size="lg")
463
+ clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
 
464
 
465
  # Right Column (Outputs)
466
  with gr.Column(scale=2):
467
  with gr.Tabs() as tabs:
468
  with gr.Tab("📝 Extracted Content"):
469
+ raw_output_stream = gr.Textbox(label="Raw Model Output Stream", interactive=False, lines=15, show_copy_button=True)
470
  with gr.Row():
471
  examples = gr.Examples(
472
  examples=["examples/1.png", "examples/2.png", "examples/3.png",
473
  "examples/4.png", "examples/5.png", "examples/6.png"],
474
  inputs=image_input, label="Examples"
475
  )
476
+ gr.Markdown("[Report-Bug💻](https://huggingface.co/spaces/prithivMLmods/Tiny-VLMs-Lab/discussions) | [prithivMLmods🤗](https://huggingface.co/prithivMLmods)")
477
+
478
  with gr.Tab("📰 README.md"):
479
  with gr.Accordion("(Result.md)", open=True):
480
  markdown_output = gr.Markdown()
 
509
  if __name__ == "__main__":
510
  demo = create_gradio_interface()
511
 
512
+ demo.queue(max_size=50).launch(mcp_server=True, ssr_mode=False, show_error=True)