Spaces:

Alioth86
/

SpeechAbstractor

Runtime error

Alioth86 commited on Dec 5, 2023

Commit

912db67

1 Parent(s): 8c468f3

Add application file

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ import re
 import torch
 import transformers
 from transformers import pipeline
 from datasets import load_dataset
 import soundfile as sf
 from IPython.display import Audio
@@ -144,9 +145,15 @@ def main_function(uploaded_filepath):
         text_per_pagy[key] = cleaned_text
     abstract_text = extract_abstract(text_per_pagy)
     #abstract the summary with my pipeline and model, deciding the length
     summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify")
-    summary = summarizer(abstract_text, max_length=65, do_sample=False)[0]['summary_text']
     #generating the audio from the text, with my pipeline and model
     synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
@@ -159,7 +166,7 @@ def main_function(uploaded_filepath):
     sf.write(audio_file_path, speech["audio"], samplerate=speech["sampling_rate"])
     #the function returns the 2 pieces we need
-    return summary, audio_file_path
 #let's communicate with gradio what it has to put in
 iface = gr.Interface(

 import torch
 import transformers
 from transformers import pipeline
+import nltk
 from datasets import load_dataset
 import soundfile as sf
 from IPython.display import Audio
         text_per_pagy[key] = cleaned_text
     abstract_text = extract_abstract(text_per_pagy)
+    nltk.download('punkt')
     #abstract the summary with my pipeline and model, deciding the length
     summarizer = pipeline("summarization", model="pszemraj/long-t5-tglobal-base-sci-simplify")
+    summary = summarizer(abstract_text, max_length=100, do_sample=False)[0]['summary_text']
+    #keeping just the first sentence, to be sure.
+    sentences = nltk.tokenize.sent_tokenize(summary)
+    first_sentence = sentences[0]
     #generating the audio from the text, with my pipeline and model
     synthesiser = pipeline("text-to-speech", model="microsoft/speecht5_tts")
     sf.write(audio_file_path, speech["audio"], samplerate=speech["sampling_rate"])
     #the function returns the 2 pieces we need
+    return first_sentence, audio_file_path
 #let's communicate with gradio what it has to put in
 iface = gr.Interface(