Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
|
|
|
|
|
| 1 |
# ================================================================
|
| 2 |
-
#
|
| 3 |
-
#
|
| 4 |
-
#
|
| 5 |
# ================================================================
|
| 6 |
-
#
|
| 7 |
-
# -------------------------
|
| 8 |
-
# PDF
|
| 9 |
-
# -------------------------
|
| 10 |
|
|
|
|
| 11 |
# To read the PDF
|
| 12 |
import PyPDF2
|
| 13 |
# To analyze the PDF layout and extract text
|
|
@@ -20,9 +19,19 @@ from PIL import Image
|
|
| 20 |
from pdf2image import convert_from_path
|
| 21 |
# To perform OCR to extract text from images
|
| 22 |
import pytesseract
|
|
|
|
| 23 |
# To remove the additional created files
|
| 24 |
import os
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# -----------------------------------------------------------------------------
|
| 27 |
# Create a function to extract text
|
| 28 |
|
|
@@ -263,12 +272,7 @@ pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
|
|
| 263 |
#
|
| 264 |
# =======================================
|
| 265 |
def sentence_to_audio(fileobj):
|
| 266 |
-
|
| 267 |
-
import torch
|
| 268 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 269 |
-
from transformers import pipeline
|
| 270 |
-
import numpy as np
|
| 271 |
-
import scipy
|
| 272 |
|
| 273 |
|
| 274 |
# text mining from pdf
|
|
@@ -308,38 +312,19 @@ def sentence_to_audio(fileobj):
|
|
| 308 |
scipy.io.wavfile.write("s_2_s.wav", rate=generated_audio["sampling_rate"], data=generated_audio["audio"].T)
|
| 309 |
return "s_2_s.wav",summary_text
|
| 310 |
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
# processor = AutoProcessor.from_pretrained("suno/bark-small")
|
| 315 |
-
# model = AutoModel.from_pretrained("suno/bark-small")
|
| 316 |
-
# inputs = processor(
|
| 317 |
-
# text=summary_text,
|
| 318 |
-
# return_tensors="pt",
|
| 319 |
-
# )
|
| 320 |
-
# speech_values = model.generate(**inputs, do_sample=True)
|
| 321 |
-
# sampling_rate = model.generation_config.sample_rate
|
| 322 |
-
# return sampling_rate, speech_values.cpu().numpy().squeeze(),summary_text
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
# ============================================================================================
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
# =======================================
|
| 329 |
-
|
| 330 |
-
import gradio as gr
|
| 331 |
-
from transformers import pipeline, AutoProcessor, AutoModel
|
| 332 |
-
from transformers import pipeline
|
| 333 |
-
|
| 334 |
# ===========================================================
|
| 335 |
|
| 336 |
#summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
|
| 337 |
-
#sentence_to_audio(summary_txt)
|
| 338 |
|
| 339 |
pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
|
| 340 |
#pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
|
| 341 |
pdf_path2=os.path.join(os.path.abspath(""), "Article_4_ExperimentalEvidence_on_the_Productivity_Effects_ of_Generative_ Artificial_Intelligence.pdf")
|
| 342 |
|
| 343 |
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
demo.launch(share=True)
|
|
|
|
| 1 |
+
# https://huggingface.co/spaces/FlavioBF/AI_in_production_PRJs
|
| 2 |
+
|
| 3 |
# ================================================================
|
| 4 |
+
#
|
| 5 |
+
# import
|
| 6 |
+
#
|
| 7 |
# ================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
#PDF PROCESSING
|
| 10 |
# To read the PDF
|
| 11 |
import PyPDF2
|
| 12 |
# To analyze the PDF layout and extract text
|
|
|
|
| 19 |
from pdf2image import convert_from_path
|
| 20 |
# To perform OCR to extract text from images
|
| 21 |
import pytesseract
|
| 22 |
+
|
| 23 |
# To remove the additional created files
|
| 24 |
import os
|
| 25 |
|
| 26 |
+
#SUMMARIZATION AND AUDIO PROCESSING
|
| 27 |
+
import torch
|
| 28 |
+
import numpy as np
|
| 29 |
+
import scipy
|
| 30 |
+
import gradio as gr
|
| 31 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 32 |
+
from transformers import pipeline, AutoProcessor, AutoModel
|
| 33 |
+
from transformers import pipeline
|
| 34 |
+
|
| 35 |
# -----------------------------------------------------------------------------
|
| 36 |
# Create a function to extract text
|
| 37 |
|
|
|
|
| 272 |
#
|
| 273 |
# =======================================
|
| 274 |
def sentence_to_audio(fileobj):
|
| 275 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
|
| 278 |
# text mining from pdf
|
|
|
|
| 312 |
scipy.io.wavfile.write("s_2_s.wav", rate=generated_audio["sampling_rate"], data=generated_audio["audio"].T)
|
| 313 |
return "s_2_s.wav",summary_text
|
| 314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
# ===========================================================
|
| 316 |
|
| 317 |
#summary_txt="It is dangerous to think of machine learning as a free-to-use toolkit, as it is common to incur ongoing maintenance costs in real-world ML systems"
|
|
|
|
| 318 |
|
| 319 |
pdf_path=os.path.join(os.path.abspath(""), "hidden-technical-debt-in-machine-learning-systems-Paper.pdf")
|
| 320 |
#pdf_path2=os.path.join(os.path.abspath(""), "1812_05944.pdf")
|
| 321 |
pdf_path2=os.path.join(os.path.abspath(""), "Article_4_ExperimentalEvidence_on_the_Productivity_Effects_ of_Generative_ Artificial_Intelligence.pdf")
|
| 322 |
|
| 323 |
|
| 324 |
+
|
| 325 |
+
#iface = gr.Interface(fn=sentence_to_audio, inputs="file", outputs=["audio",gr.Textbox(lines=4,label="one sentence summ.")],title="SINGLE SENTENCE SUMMARY TO AUDIO CONVERSIONE (upload only pdf files with Abstract section)")
|
| 326 |
+
#iface.launch(share=True)
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
demo = gr.Interface(fn=sentence_to_audio, inputs="file", outputs=["audio",,gr.Textbox(lines=4,label="one sentence summ.")],examples=[pdf_path,pdf_path2],title="SINGLE SENTENCE SUMMARY TO AUDIO CONVERSIONE (upload only pdf files with Abstract section)")
|
| 330 |
demo.launch(share=True)
|