Spaces:
Running
Running
| # app.py | |
| import gradio as gr | |
| from extract_text_from_pdf import PDFTextExtractor | |
| from generate_transcript import TranscriptProcessor | |
| #from generate_audio import TTSGenerator | |
| from generate_audio_edgetts import EdgeTTSGenerator | |
| import pickle | |
| import os | |
| import tempfile | |
| import shutil | |
| #import spaces | |
| import asyncio | |
| def create_temp_session_directory(): | |
| return tempfile.mkdtemp() | |
| # Combined function to perform all steps sequentially | |
| #@spaces.GPU(duration=120) | |
| def process_pdf_to_podcast(pdf_file): | |
| session_dir = create_temp_session_directory() | |
| # Define paths within the session directory | |
| pdf_path = os.path.join(session_dir, "uploaded_pdf.pdf") | |
| clean_text_path = os.path.join(session_dir, "clean_text.txt") | |
| transcript_path = os.path.join(session_dir, "data.pkl") | |
| tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl") | |
| audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3") | |
| text_model= "llama3-70b-8192" | |
| # Step 1: Extract Text from PDF | |
| shutil.copy(pdf_file, pdf_path) | |
| extractor = PDFTextExtractor(pdf_path,clean_text_path) | |
| clean_text_path = extractor.clean_and_save_text() | |
| # Display a preview of extracted text | |
| with open(clean_text_path, 'r', encoding='utf-8') as file: | |
| text_preview = file.read(500) | |
| # Step 2: Generate Transcript | |
| processor = TranscriptProcessor(clean_text_path,transcript_path,tts_ready_path,text_model) | |
| transcript_path = processor.generate_transcript() | |
| # Load the generated transcript for preview | |
| with open(transcript_path, 'rb') as f: | |
| transcript_preview = pickle.load(f) | |
| # Step 3: Rewrite Transcript for TTS | |
| tts_ready_path = processor.rewrite_transcript() | |
| # Load the rewritten transcript for preview and editing | |
| with open(tts_ready_path, 'rb') as f: | |
| tts_ready_preview = pickle.load(f) | |
| return ( | |
| f"Steps 1-3 completed. Preview and adjust the rewritten transcript if needed.", | |
| text_preview, | |
| transcript_preview, | |
| tts_ready_preview, | |
| session_dir | |
| ) | |
| # Final Step: Generate Audio after optional adjustments | |
| #@spaces.GPU(duration=300) | |
| def generate_audio_from_modified_text(tts_ready_text,session_dir): | |
| if not session_dir: | |
| session_dir = create_temp_session_directory() | |
| tts_ready_path = os.path.join(session_dir, "podcast_ready_data.pkl") | |
| audio_output_path = os.path.join(session_dir, "final_podcast_audio.mp3") | |
| # Save any modified TTS-ready transcript | |
| with open(tts_ready_path, 'wb') as f: | |
| pickle.dump(tts_ready_text, f) | |
| # Generate audio from the TTS-ready transcript | |
| #tts_gen = TTSGenerator(tts_ready_path,audio_output_path) | |
| #audio_path = tts_gen.generate_audio() | |
| #return f"Step 4 complete. Audio saved to {audio_path}.", audio_path | |
| tts_gen = EdgeTTSGenerator(tts_ready_path,audio_output_path) | |
| audio_path=asyncio.run(tts_gen.generate_audio()) | |
| return f"Step 4 complete. Audio saved to {audio_path}.", audio_path | |
| # Gradio Interface | |
| with gr.Blocks() as app: | |
| gr.Markdown("# PDF to Podcast Conversion Application") | |
| # Single-click initiation of Steps 1-3 | |
| with gr.Row(): | |
| pdf_input = gr.File(label="Upload PDF") | |
| run_all_button = gr.Button("Run All Steps (1-3)") | |
| output_status = gr.Textbox(label="Status") | |
| # Step 1 Preview of Extracted Text | |
| extracted_text_preview = gr.Textbox(label="Extracted Text Preview (First 500 Characters)", interactive=False) | |
| # Step 2 Preview of Generated Transcript | |
| transcript_preview = gr.Textbox(label="Generated Transcript Preview", interactive=False) | |
| # Step 3 Editable Rewritten Transcript for TTS | |
| tts_ready_preview = gr.Textbox(label="Editable Rewritten Transcript for TTS", interactive=True) | |
| # Button for generating audio with editable transcript | |
| generate_audio_button = gr.Button("Generate Audio from Edited Transcript") | |
| final_audio_output = gr.Audio(label="Generated Podcast Audio") | |
| powered_by_gradio=gr.HTML("""<div style="width: 100px; height: auto; text-align: center;"> | |
| <a href="https://groq.com" target="_blank" rel="noopener noreferrer"> | |
| <img | |
| src="https://groq.com/wp-content/uploads/2024/03/PBG-mark1-color.svg" | |
| alt="Powered by Groq for fast inference." | |
| style="width: 100%; height: auto;" | |
| /> | |
| </a> | |
| </div>""") | |
| session_dir = gr.State() | |
| # Step 1-3 execution | |
| run_all_button.click( | |
| process_pdf_to_podcast, | |
| inputs=pdf_input, | |
| outputs=[output_status, extracted_text_preview, transcript_preview, tts_ready_preview,session_dir] | |
| ) | |
| # Final step: Generate Audio from modified TTS-ready transcript | |
| generate_audio_button.click( | |
| generate_audio_from_modified_text, | |
| inputs=[tts_ready_preview, session_dir], | |
| outputs=[output_status, final_audio_output] | |
| ) | |
| app.launch() | |