Spaces:
Sleeping
Sleeping
| # generate_transcript.py | |
| import pickle | |
| from tqdm import tqdm | |
| import warnings | |
| from groq import Groq | |
| import os | |
| import re | |
| warnings.filterwarnings('ignore') | |
| class TranscriptProcessor: | |
| """ | |
| A class to generate and rewrite podcast-style transcripts using a specified language model. | |
| """ | |
| def __init__(self, text_file_path,transcript_output_path,tts_output_path, model_name="llama3-70b-8192"): | |
| """ | |
| Initialize with the path to the cleaned text file and the model name. | |
| Args: | |
| text_file_path (str): Path to the file containing cleaned PDF text. | |
| model_name (str): Name of the language model to use. | |
| """ | |
| self.text_file_path = text_file_path | |
| self.transcript_output_path = transcript_output_path | |
| self.tts_output_path = tts_output_path | |
| self.model_name = model_name | |
| self.transcript_prompt = """ | |
| You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, Lex Fridman, Ben Shapiro, Tim Ferris. | |
| We are in an alternate universe where actually you have been writing every line they say and they just stream it into their brains. | |
| You have won multiple podcast awards for your writing. | |
| Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. | |
| Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc | |
| Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes | |
| Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions | |
| Make sure the tangents speaker 2 provides are quite wild or interesting. | |
| It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait | |
| ALWAYS START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: | |
| DO NOT GIVE EPISODE TITLES SEPERATELY, LET SPEAKER 1 TITLE IT IN HER SPEECH | |
| DO NOT GIVE CHAPTER TITLES | |
| IT SHOULD STRICTLY BE THE DIALOGUES | |
| """ | |
| self.rewrite_prompt = """ | |
| You are an international oscar winnning screenwriter | |
| You have been working with multiple award winning podcasters. | |
| Your job is to use the podcast transcript written below to re-write it for an AI Text-To-Speech Pipeline. A very dumb AI had written this so you have to step up for your kind. | |
| Make it as engaging as possible, Speaker 1 and 2 will be simulated by different voice engines | |
| Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc | |
| Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes | |
| Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions | |
| Make sure the tangents speaker 2 provides are quite wild or interesting. | |
| REMEMBER THIS WITH YOUR HEART | |
| It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait | |
| Please re-write to make it as characteristic as possible | |
| START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: | |
| STRICTLY RETURN YOUR RESPONSE AS A LIST OF TUPLES OK? | |
| IT WILL START DIRECTLY WITH THE LIST AND END WITH THE LIST NOTHING ELSE | |
| Example of response: | |
| [ | |
| ("Speaker 1", "Welcome to our podcast, where we explore the latest advancements in AI and technology. I'm your host, and today we're joined by a renowned expert in the field of AI. We're going to dive into the exciting world of Llama 3.2, the latest release from Meta AI."), | |
| ("Speaker 2", "Hi, I'm excited to be here! So, what is Llama 3.2?"), | |
| ("Speaker 1", "Ah, great question! Llama 3.2 is an open-source AI model that allows developers to fine-tune, distill, and deploy AI models anywhere. It's a significant update from the previous version, with improved performance, efficiency, and customization options."), | |
| ("Speaker 2", "That sounds amazing! What are some of the key features of Llama 3.2?") | |
| ] | |
| """ | |
| def load_text(self): | |
| """ | |
| Reads the cleaned text file and returns its content. | |
| Returns: | |
| str: Content of the cleaned text file. | |
| """ | |
| encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1'] | |
| for encoding in encodings: | |
| try: | |
| with open(self.text_file_path, 'r', encoding=encoding) as file: | |
| content = file.read() | |
| print(f"Successfully read file using {encoding} encoding.") | |
| return content | |
| except (UnicodeDecodeError, FileNotFoundError): | |
| continue | |
| print(f"Error: Could not decode file '{self.text_file_path}' with any common encoding.") | |
| return None | |
| def generate_transcript(self): | |
| """ | |
| Generates a podcast-style transcript and saves it as a pickled file. | |
| Returns: | |
| str: Path to the file where the transcript is saved. | |
| """ | |
| input_text = self.load_text() | |
| if input_text is None: | |
| return None | |
| messages = [ | |
| {"role": "system", "content": self.transcript_prompt}, | |
| {"role": "user", "content": input_text} | |
| ] | |
| client = Groq( | |
| api_key=os.environ.get("GROQ_API_KEY"), | |
| ) | |
| chat_completion = client.chat.completions.create( | |
| messages=messages, | |
| model=self.model_name, | |
| ) | |
| transcript = chat_completion.choices[0].message.content | |
| # Save the transcript as a pickle file | |
| with open(self.transcript_output_path, 'wb') as f: | |
| pickle.dump(transcript, f) | |
| return self.transcript_output_path | |
| def extract_tuple(self,text): | |
| match = re.search(r'\[.*\]', text, re.DOTALL) | |
| if match: | |
| return match.group(0) | |
| return None | |
| def rewrite_transcript(self): | |
| """ | |
| Refines the transcript for TTS, adding expressive elements and saving as a list of tuples. | |
| Returns: | |
| str: Path to the file where the TTS-ready transcript is saved. | |
| """ | |
| # Load the initial generated transcript | |
| with open(self.transcript_output_path, 'rb') as file: | |
| input_transcript = pickle.load(file) | |
| messages = [ | |
| {"role": "system", "content": self.rewrite_prompt}, | |
| {"role": "user", "content": input_transcript} | |
| ] | |
| client = Groq( | |
| api_key=os.environ.get("GROQ_API_KEY"), | |
| ) | |
| chat_completion = client.chat.completions.create( | |
| messages=messages, | |
| model=self.model_name, | |
| ) | |
| rewritten_transcript = self.extract_tuple(chat_completion.choices[0].message.content) | |
| # Save the rewritten transcript as a pickle file | |
| with open(self.tts_output_path, 'wb') as f: | |
| pickle.dump(rewritten_transcript, f) | |
| return self.tts_output_path | |