Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import random | |
| import os | |
| import re | |
| import requests | |
| from huggingface_hub import InferenceClient | |
| from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound | |
| from fpdf import FPDF | |
| from fpdf.enums import XPos, YPos | |
| from datetime import datetime | |
| # RapidAPI ์ค์ | |
| RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY") | |
| RAPIDAPI_HOST = "youtube-transcriptor.p.rapidapi.com" | |
| # ํด๋ผ์ด์ธํธ ์์ฑ ํจ์ | |
| def create_client(model_name): | |
| return InferenceClient(model_name, token=os.getenv("HF_TOKEN")) | |
| client = create_client("CohereForAI/c4ai-command-r-plus") | |
| # API ํธ์ถ ํจ์ | |
| def call_api(content, system_message, max_tokens, temperature, top_p): | |
| messages = [{"role": "system", "content": system_message}, {"role": "user", "content": content}] | |
| random_seed = random.randint(0, 1000000) | |
| response = client.chat_completion(messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, seed=random_seed) | |
| return response.choices[0].message.content | |
| # ์ ํ๋ธ ๋น๋์ค ID ์ถ์ถ ํจ์ | |
| def get_video_id(youtube_url): | |
| video_id_match = re.search(r"(?<=v=)[^#&?]*", youtube_url) or re.search(r"(?<=youtu.be/)[^#&?]*", youtube_url) | |
| return video_id_match.group(0) if video_id_match else None | |
| # ์ ํ๋ธ ํธ๋์คํฌ๋ฆฝํธ ์ถ์ถ ํจ์ | |
| def get_transcript(youtube_url): | |
| video_id = get_video_id(youtube_url) | |
| if not video_id: | |
| return "Invalid YouTube URL. Please enter a valid URL." | |
| if not RAPIDAPI_KEY: | |
| return "Error: RAPIDAPI_KEY is not set in the environment variables." | |
| url = f"https://youtube-transcriptor.p.rapidapi.com/transcript?video_id={video_id}&lang=en" | |
| headers = { | |
| "X-RapidAPI-Key": RAPIDAPI_KEY, | |
| "X-RapidAPI-Host": RAPIDAPI_HOST | |
| } | |
| try: | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() # Raises a HTTPError if the status is 4xx, 5xx | |
| transcript_data = response.json() | |
| if isinstance(transcript_data, list) and transcript_data: | |
| transcript_text = " ".join([entry.get('text', '') for entry in transcript_data]) | |
| return f"Transcript:\n\n{transcript_text}" | |
| else: | |
| return "No transcript available or unexpected response format." | |
| except requests.RequestException as e: | |
| return f"Error fetching transcript: {str(e)}" | |
| # ์ ๋ณด ๋ถ์ ํจ์ | |
| def analyze_info(category, style, transcripts): | |
| transcript_list = transcripts.split("\n\n---\n\n") | |
| analyzed_content = f"์ ํํ ์นดํ ๊ณ ๋ฆฌ: {category}\n์ ํํ ํฌ์คํ ์คํ์ผ: {style}\n\n" | |
| for i, transcript in enumerate(transcript_list, 1): | |
| analyzed_content += f"์ ํ๋ธ ํธ๋์คํฌ๋ฆฝํธ {i}:\n{transcript}\n\n" | |
| return analyzed_content | |
| # ๋ธ๋ก๊ทธ ํฌ์คํธ ์์ฑ ํจ์ | |
| def generate_blog_post(category, style, transcripts, category_prompt, style_prompt, max_tokens, temperature, top_p): | |
| full_content = analyze_info(category, style, transcripts) | |
| combined_prompt = f"{category_prompt}\n\n{style_prompt}\n\n{full_content}" | |
| modified_text = call_api(combined_prompt, "", max_tokens, temperature, top_p) | |
| return modified_text.replace('\n', '\n\n') | |
| # ์ ํ๋ธ ๋๋ณธ ์์ฝ ํจ์ | |
| def summarize_transcript(transcripts, system_message, max_tokens, temperature, top_p): | |
| summary = call_api(transcripts, system_message, max_tokens, temperature, top_p) | |
| return summary | |
| # ์นดํ ๊ณ ๋ฆฌ๋ณ ํ๋กฌํํธ ํจ์ | |
| def get_blog_post_prompt(category): | |
| if category == "์ผ๋ฐํ": | |
| return """ | |
| #์ ํ๋ธ ๋๋ณธ์ ๋ธ๋ก๊ทธ ํฌ์คํ ์ผ๋ก ๋ณํํ๋ ๊ท์น(์ผ๋ฐํ_v4) | |
| """ | |
| elif category == "์ ๋ณด์ฑ": | |
| return """ | |
| #์ ํ๋ธ ๋๋ณธ์ ๋ธ๋ก๊ทธ ํฌ์คํ ์ผ๋ก ๋ณํํ๋ ๊ท์น(์ ๋ณด์ฑ_v4) | |
| """ | |
| elif category == "1๊ฐ ์ํ ์ถ์ฒํ": | |
| return """ | |
| #์ ํ๋ธ ๋๋ณธ์ ๋ธ๋ก๊ทธ ํฌ์คํ ์ผ๋ก ๋ณํํ๋ ๊ท์น(์ถ์ฒํ_v4) | |
| """ | |
| elif category == "ํ๋ ์ด์ ํ": | |
| return """ | |
| #์ ํ๋ธ ๋๋ณธ์ ๋ธ๋ก๊ทธ ํฌ์คํ ์ผ๋ก ๋ณํํ๋ ๊ท์น(ํ๋ ์ด์ ํ_v3) | |
| """ | |
| # ํฌ์คํ ์คํ์ผ ํ๋กฌํํธ ํจ์ | |
| def get_style_prompt(style): | |
| prompts = { | |
| "์น๊ทผํ": """ | |
| ์ ๊ฐ ์๊ฒ ๋ ๊ฟํ๋ค์ ํ๋ํ๋ ์๋ ค๋๋ฆด๊ฒ์. | |
| """, | |
| "์ผ๋ฐ":"""#์ผ๋ฐ์ ์ธ ๋ธ๋ก๊ทธ ํฌ์คํ ์คํ์ผ ๊ฐ์ด๋ | |
| """, | |
| "์ ๋ฌธ์ ์ธ": """ | |
| #์ ๋ฌธ์ ์ธ ๋ธ๋ก๊ทธ ํฌ์คํ ์คํ์ผ ๊ฐ์ด๋ | |
| """ | |
| } | |
| return prompts.get(style, "ํฌ์คํ ์คํ์ผ ํ๋กฌํํธ") | |
| # ํฌ์คํ ์คํ์ผ ์ค๋ช ํจ์ | |
| def get_style_description(style): | |
| descriptions = { | |
| "์น๊ทผํ": "๋ ์์ ๊ฐ๊น์ด ์น๊ตฌ์ฒ๋ผ ๋ํํ๋ ๋ฏํ ์น๊ทผํ ์คํ์ผ์ ๋๋ค.", | |
| "์ผ๋ฐ": "์ผ๋ฐ์ ์ด๊ณ ์ค๋ฆฝ์ ์ธ ํค์ผ๋ก ์ ๋ณด๋ฅผ ์ ๋ฌํ๋ ์คํ์ผ์ ๋๋ค.", | |
| "์ ๋ฌธ์ ์ธ": "์ ๋ฌธ๊ฐ์ ์๊ฐ์์ ๊น์ด ์๋ ์ ๋ณด๋ฅผ ์ ๋ฌํ๋ ์คํ์ผ์ ๋๋ค." | |
| } | |
| return descriptions.get(style, "ํฌ์คํ ์คํ์ผ์ ์ ํํ์ธ์.") | |
| # ํ๋กฌํํธ ์ ๋ฐ์ดํธ ํจ์ | |
| def update_prompts_and_description(category, style): | |
| blog_post_prompt = get_blog_post_prompt(category) | |
| style_prompt = get_style_prompt(style) | |
| style_description = get_style_description(style) | |
| return blog_post_prompt, style_prompt, style_description | |
| def format_filename(text): | |
| text = re.sub(r'[^\w\s-]', '', text) | |
| return text[:50].strip() | |
| def extract_first_recommended_title(blog_post): | |
| section_match = re.search(r'(?:#+\s*)?์ถ์ฒ\s*์ ๋ชฉ:?\s*\n([\s\S]*?)(?=\n(?:#+|$)|$)', blog_post, re.IGNORECASE) | |
| if section_match: | |
| section_content = section_match.group(1) | |
| title_match = re.search(r'(?:^|\n)\s*(?:\d+\.|-|\*|\โข)?\s*(.*?)(?=\n|$)', section_content) | |
| if title_match: | |
| title = title_match.group(1).strip() | |
| print(f"Extracted title: {title}") | |
| return title | |
| print("No title found") | |
| return "๋ธ๋ก๊ทธ_๊ธ" | |
| class PDF(FPDF): | |
| def __init__(self): | |
| super().__init__() | |
| self.add_font("NanumGothic", "", "NanumGothic.ttf") | |
| self.add_font("NanumGothicBold", "", "NanumGothicBold.ttf") | |
| self.add_font("NanumGothicExtraBold", "", "NanumGothicExtraBold.ttf") | |
| self.add_font("NanumGothicLight", "", "NanumGothicLight.ttf") | |
| def header(self): | |
| # ํค๋๋ฅผ ๋น์๋ก๋๋ค | |
| pass | |
| def footer(self): | |
| self.set_y(-15) | |
| self.set_font('NanumGothicLight', '', 8) | |
| self.cell(0, 10, f'Page {self.page_no()}', 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C') | |
| def save_to_pdf(summary, blog_post, file_type): | |
| pdf = PDF() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| pdf.add_page() | |
| pdf.set_font("NanumGothicExtraBold", size=16) | |
| pdf.cell(0, 10, "์์ฝ", new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C') | |
| pdf.ln(5) | |
| pdf.set_font("NanumGothic", size=11) | |
| pdf.multi_cell(0, 6, summary) | |
| pdf.add_page() | |
| pdf.set_font("NanumGothicExtraBold", size=16) | |
| pdf.cell(0, 10, "๋ธ๋ก๊ทธ ๊ธ", new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C') | |
| pdf.ln(5) | |
| lines = blog_post.split('\n') | |
| for line in lines: | |
| if line.strip() == '': | |
| pdf.ln(3) # ๋น ์ค์ ์์ ๊ฐ๊ฒฉ๋ง ์ถ๊ฐ | |
| elif line.startswith('#'): # ์ ๋ชฉ์ผ๋ก ๊ฐ์ฃผ | |
| pdf.set_font("NanumGothicBold", size=14) | |
| pdf.multi_cell(0, 8, line.lstrip('#').strip()) | |
| pdf.ln(2) | |
| elif line.startswith('##'): # ๋ถ์ ๋ชฉ์ผ๋ก ๊ฐ์ฃผ | |
| pdf.set_font("NanumGothicBold", size=12) | |
| pdf.multi_cell(0, 7, line.lstrip('#').strip()) | |
| pdf.ln(2) | |
| else: | |
| pdf.set_font("NanumGothic", size=11) | |
| pdf.multi_cell(0, 6, line.strip()) | |
| pdf.ln(1) | |
| title = extract_first_recommended_title(blog_post) | |
| today_date = datetime.now().strftime("%Y%m%d") | |
| filename = f"{today_date}_{format_filename(title)}.pdf" | |
| print(f"Saving PDF as: {filename}") | |
| pdf.output(filename) | |
| return filename | |
| # Gradio ์ธํฐํ์ด์ค์ฉ PDF ์ ์ฅ ํจ์ | |
| def save_content_to_pdf(summary, blog_post): | |
| filename = save_to_pdf(summary, blog_post, "๋ธ๋ก๊ทธ") | |
| return filename | |
| # Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ | |
| title = "์ ํ๋ธ๋ก ๋ธ๋ก๊ทธ ๊ธ ์์ฑํ๊ธฐ" | |
| with gr.Blocks() as demo: | |
| gr.Markdown(f"# {title}") | |
| # 1๋จ๊ณ: ์นดํ ๊ณ ๋ฆฌ ์ ํ | |
| gr.Markdown("### 1๋จ๊ณ: ํฌ์คํ ์นดํ ๊ณ ๋ฆฌ๋ฅผ ์ง์ ํด์ฃผ์ธ์", elem_id="step-title") | |
| category = gr.Radio(choices=["์ผ๋ฐํ","์ ๋ณด์ฑ", "1๊ฐ ์ํ ์ถ์ฒํ", "ํ๋ ์ด์ ํ"], label="ํฌ์คํ ์นดํ ๊ณ ๋ฆฌ", value="์ผ๋ฐํ") | |
| # ๊ตฌ๋ถ์ ์ถ๊ฐ | |
| gr.Markdown("---\n\n") | |
| # 2๋จ๊ณ: ํฌ์คํ ์คํ์ผ ์ ํ | |
| gr.Markdown("### 2๋จ๊ณ: ํฌ์คํ ์คํ์ผ์ ์ ํํด์ฃผ์ธ์", elem_id="step-title") | |
| style = gr.Radio(choices=["์น๊ทผํ", "์ผ๋ฐ", "์ ๋ฌธ์ ์ธ"], label="ํฌ์คํ ์คํ์ผ", value="์น๊ทผํ") | |
| style_description = gr.Markdown(f"{get_style_description('์น๊ทผํ')}", elem_id="style-description") | |
| # ๊ตฌ๋ถ์ ์ถ๊ฐ | |
| gr.Markdown("---\n\n") | |
| # 3๋จ๊ณ: ์ ํ๋ธ ๋งํฌ๋ฅผ ์ ๋ ฅํ์ธ์ | |
| gr.Markdown("### 3๋จ๊ณ: ์ ํ๋ธ ๋งํฌ๋ฅผ ์ ๋ ฅํ์ธ์", elem_id="step-title") | |
| with gr.Row(): | |
| youtube_url1 = gr.Textbox(label="YouTube URL 1", placeholder="์ฒซ ๋ฒ์งธ ์ ํ๋ธ ๋งํฌ๋ฅผ ์ ๋ ฅํ์ธ์") | |
| youtube_url2 = gr.Textbox(label="YouTube URL 2", placeholder="๋ ๋ฒ์งธ ์ ํ๋ธ ๋งํฌ๋ฅผ ์ ๋ ฅํ์ธ์") | |
| youtube_url3 = gr.Textbox(label="YouTube URL 3", placeholder="์ธ ๋ฒ์งธ ์ ํ๋ธ ๋งํฌ๋ฅผ ์ ๋ ฅํ์ธ์") | |
| # ์จ๊ฒจ์ง ํ ์คํธ๋ฐ์ค (์ฌ์ฉ์์๊ฒ ๋ณด์ด์ง ์์) | |
| combined_urls = gr.Textbox(visible=False) | |
| transcript_output = gr.Textbox(label="์ ํ๋ธ ํธ๋์คํฌ๋ฆฝํธ", lines=10) | |
| # ์ ํ๋ธ ํธ๋์คํฌ๋ฆฝํธ ๊ฐ์ ธ์ค๊ธฐ ํจ์ | |
| def combine_and_get_transcripts(url1, url2, url3): | |
| urls = [url for url in [url1, url2, url3] if url.strip()] | |
| combined = ",".join(urls) | |
| all_transcripts = [] | |
| for url in urls: | |
| transcript = get_transcript(url.strip()) | |
| all_transcripts.append(transcript) | |
| return combined, "\n\n---\n\n".join(all_transcripts) | |
| # ์ ๋ ฅ ๋ณ๊ฒฝ ์ ํธ๋์คํฌ๋ฆฝํธ ์ ๋ฐ์ดํธ | |
| for url_input in [youtube_url1, youtube_url2, youtube_url3]: | |
| url_input.change( | |
| fn=combine_and_get_transcripts, | |
| inputs=[youtube_url1, youtube_url2, youtube_url3], | |
| outputs=[combined_urls, transcript_output] | |
| ) | |
| # ์์ฝ๊ธ ์์ฑํ๊ธฐ ๊ธฐ๋ฅ ์ถ๊ฐ | |
| gr.Markdown("### ์์ฝ๊ธ ์์ฑํ๊ธฐ", elem_id="step-title") | |
| with gr.Accordion("์์ฝ๊ธ ์ค์ ", open=False): | |
| summary_system_message = gr.Textbox( | |
| label="์์ฝ๊ธ ์์คํ ๋ฉ์์ง", | |
| value=""" | |
| #์ ํ๋ธ ๋๋ณธ ์์ฝ ๊ท์น | |
| """, | |
| lines=15, | |
| visible=True | |
| ) | |
| summary_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=7000, value=5000, step=1000) | |
| summary_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7, step=0.05) | |
| summary_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05) | |
| summarize_btn = gr.Button("์์ฝ๊ธ ์์ฑํ๊ธฐ") | |
| summary_output = gr.Textbox(label="์์ฝ๋ ๊ธ", lines=10) | |
| def generate_summary(transcript, system_message, max_tokens, temperature, top_p): | |
| summary = summarize_transcript(transcript, system_message, max_tokens, temperature, top_p) | |
| return summary | |
| summarize_btn.click( | |
| fn=generate_summary, | |
| inputs=[transcript_output, summary_system_message, summary_max_tokens, summary_temperature, summary_top_p], | |
| outputs=[summary_output] | |
| ) | |
| # ๊ตฌ๋ถ์ ์ถ๊ฐ | |
| gr.Markdown("---\n\n") | |
| # 4๋จ๊ณ: ๊ธ ์์ฑํ๊ธฐ | |
| gr.Markdown("### 4๋จ๊ณ: ๊ธ ์์ฑํ๊ธฐ", elem_id="step-title") | |
| gr.HTML("<span style='color: grey;'>[์์ฑํ๊ธฐ ๋ฒํผ์ ์ ํํด์ฃผ์ธ์]</span>") | |
| with gr.Accordion("๋ธ๋ก๊ทธ ๊ธ ์ค์ ", open=False): | |
| blog_system_message = gr.Textbox(label="์นดํ ๊ณ ๋ฆฌ ํ๋กฌํํธ", value=get_blog_post_prompt("์ผ๋ฐํ"), lines=20, visible=True) | |
| style_prompt_hidden = gr.Textbox(label="์คํ์ผ ํ๋กฌํํธ", value=get_style_prompt("์น๊ทผํ"), lines=10, visible=False) # ์ด๊ธฐ๊ฐ ์ค์ | |
| blog_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=12000, value=8000, step=1000) | |
| blog_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1) | |
| blog_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05) | |
| generate_btn = gr.Button("๋ธ๋ก๊ทธ ๊ธ ์์ฑํ๊ธฐ") | |
| blog_output = gr.Textbox(label="์์ฑ๋ ๋ธ๋ก๊ทธ ๊ธ", lines=30) | |
| def generate_blog_content(category, style, transcripts, category_prompt, style_prompt, max_tokens, temperature, top_p): | |
| blog_post = generate_blog_post(category, style, transcripts, category_prompt, style_prompt, max_tokens, temperature, top_p) | |
| return blog_post | |
| generate_btn.click( | |
| fn=generate_blog_content, | |
| inputs=[category, style, transcript_output, blog_system_message, style_prompt_hidden, blog_max_tokens, blog_temperature, blog_top_p], | |
| outputs=[blog_output] | |
| ) | |
| # PDF ์ ์ฅ ๋ฒํผ ์ถ๊ฐ | |
| save_pdf_btn = gr.Button("PDF๋ก ์ ์ฅํ๊ธฐ") | |
| pdf_output = gr.File(label="์์ฑ๋ PDF ํ์ผ") | |
| save_pdf_btn.click( | |
| fn=save_content_to_pdf, | |
| inputs=[summary_output, blog_output], | |
| outputs=[pdf_output] | |
| ) | |
| # ์นดํ ๊ณ ๋ฆฌ์ ์คํ์ผ์ด ๋ณ๊ฒฝ๋ ๋ ํ๋กฌํํธ ์ ๋ฐ์ดํธ | |
| def update_prompts_and_description(category, style): | |
| blog_post_prompt = get_blog_post_prompt(category) | |
| style_prompt = get_style_prompt(style) | |
| style_description = get_style_description(style) | |
| return blog_post_prompt, style_prompt, style_description | |
| category.change(fn=update_prompts_and_description, inputs=[category, style], outputs=[blog_system_message, style_prompt_hidden, style_description]) | |
| style.change(fn=update_prompts_and_description, inputs=[category, style], outputs=[blog_system_message, style_prompt_hidden, style_description]) | |
| demo.launch() | |
| # CSS ์คํ์ผ ์ถ๊ฐ | |
| gr.HTML(""" | |
| <style> | |
| #step-title { | |
| font-size: 1.7em; | |
| font-weight: bold; | |
| } | |
| #style-description { | |
| font-size: 1.2em; | |
| } | |
| </style> | |
| """) |