AIRider's picture
Update app.py
bc45ae2 verified
raw
history blame
14.6 kB
import gradio as gr
import random
import os
import re
import requests
from huggingface_hub import InferenceClient
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
from fpdf import FPDF
from fpdf.enums import XPos, YPos
from datetime import datetime
# RapidAPI ์„ค์ •
RAPIDAPI_KEY = os.getenv("RAPIDAPI_KEY")
RAPIDAPI_HOST = "youtube-transcriptor.p.rapidapi.com"
# ํด๋ผ์ด์–ธํŠธ ์ƒ์„ฑ ํ•จ์ˆ˜
def create_client(model_name):
return InferenceClient(model_name, token=os.getenv("HF_TOKEN"))
client = create_client("CohereForAI/c4ai-command-r-plus")
# API ํ˜ธ์ถœ ํ•จ์ˆ˜
def call_api(content, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}, {"role": "user", "content": content}]
random_seed = random.randint(0, 1000000)
response = client.chat_completion(messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, seed=random_seed)
return response.choices[0].message.content
# ์œ ํŠœ๋ธŒ ๋น„๋””์˜ค ID ์ถ”์ถœ ํ•จ์ˆ˜
def get_video_id(youtube_url):
video_id_match = re.search(r"(?<=v=)[^#&?]*", youtube_url) or re.search(r"(?<=youtu.be/)[^#&?]*", youtube_url)
return video_id_match.group(0) if video_id_match else None
# ์œ ํŠœ๋ธŒ ํŠธ๋žœ์Šคํฌ๋ฆฝํŠธ ์ถ”์ถœ ํ•จ์ˆ˜
def get_transcript(youtube_url):
video_id = get_video_id(youtube_url)
if not video_id:
return "Invalid YouTube URL. Please enter a valid URL."
if not RAPIDAPI_KEY:
return "Error: RAPIDAPI_KEY is not set in the environment variables."
url = f"https://youtube-transcriptor.p.rapidapi.com/transcript?video_id={video_id}&lang=en"
headers = {
"X-RapidAPI-Key": RAPIDAPI_KEY,
"X-RapidAPI-Host": RAPIDAPI_HOST
}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raises a HTTPError if the status is 4xx, 5xx
transcript_data = response.json()
if isinstance(transcript_data, list) and transcript_data:
transcript_text = " ".join([entry.get('text', '') for entry in transcript_data])
return f"Transcript:\n\n{transcript_text}"
else:
return "No transcript available or unexpected response format."
except requests.RequestException as e:
return f"Error fetching transcript: {str(e)}"
# ์ •๋ณด ๋ถ„์„ ํ•จ์ˆ˜
def analyze_info(category, style, transcripts):
transcript_list = transcripts.split("\n\n---\n\n")
analyzed_content = f"์„ ํƒํ•œ ์นดํ…Œ๊ณ ๋ฆฌ: {category}\n์„ ํƒํ•œ ํฌ์ŠคํŒ… ์Šคํƒ€์ผ: {style}\n\n"
for i, transcript in enumerate(transcript_list, 1):
analyzed_content += f"์œ ํŠœ๋ธŒ ํŠธ๋žœ์Šคํฌ๋ฆฝํŠธ {i}:\n{transcript}\n\n"
return analyzed_content
# ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŠธ ์ƒ์„ฑ ํ•จ์ˆ˜
def generate_blog_post(category, style, transcripts, category_prompt, style_prompt, max_tokens, temperature, top_p):
full_content = analyze_info(category, style, transcripts)
combined_prompt = f"{category_prompt}\n\n{style_prompt}\n\n{full_content}"
modified_text = call_api(combined_prompt, "", max_tokens, temperature, top_p)
return modified_text.replace('\n', '\n\n')
# ์œ ํŠœ๋ธŒ ๋Œ€๋ณธ ์š”์•ฝ ํ•จ์ˆ˜
def summarize_transcript(transcripts, system_message, max_tokens, temperature, top_p):
summary = call_api(transcripts, system_message, max_tokens, temperature, top_p)
return summary
# ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ํ”„๋กฌํ”„ํŠธ ํ•จ์ˆ˜
def get_blog_post_prompt(category):
if category == "์ผ๋ฐ˜ํ˜•":
return """
#์œ ํŠœ๋ธŒ ๋Œ€๋ณธ์„ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ๊ทœ์น™(์ผ๋ฐ˜ํ˜•_v4)
"""
elif category == "์ •๋ณด์„ฑ":
return """
#์œ ํŠœ๋ธŒ ๋Œ€๋ณธ์„ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ๊ทœ์น™(์ •๋ณด์„ฑ_v4)
"""
elif category == "1๊ฐœ ์ƒํ’ˆ ์ถ”์ฒœํ˜•":
return """
#์œ ํŠœ๋ธŒ ๋Œ€๋ณธ์„ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ๊ทœ์น™(์ถ”์ฒœํ˜•_v4)
"""
elif category == "ํ๋ ˆ์ด์…˜ํ˜•":
return """
#์œ ํŠœ๋ธŒ ๋Œ€๋ณธ์„ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ…์œผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ๊ทœ์น™(ํ๋ ˆ์ด์…˜ํ˜•_v3)
"""
# ํฌ์ŠคํŒ… ์Šคํƒ€์ผ ํ”„๋กฌํ”„ํŠธ ํ•จ์ˆ˜
def get_style_prompt(style):
prompts = {
"์นœ๊ทผํ•œ": """
์ œ๊ฐ€ ์•Œ๊ฒŒ ๋œ ๊ฟ€ํŒ๋“ค์„ ํ•˜๋‚˜ํ•˜๋‚˜ ์•Œ๋ ค๋“œ๋ฆด๊ฒŒ์š”.
""",
"์ผ๋ฐ˜":"""#์ผ๋ฐ˜์ ์ธ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ… ์Šคํƒ€์ผ ๊ฐ€์ด๋“œ
""",
"์ „๋ฌธ์ ์ธ": """
#์ „๋ฌธ์ ์ธ ๋ธ”๋กœ๊ทธ ํฌ์ŠคํŒ… ์Šคํƒ€์ผ ๊ฐ€์ด๋“œ
"""
}
return prompts.get(style, "ํฌ์ŠคํŒ… ์Šคํƒ€์ผ ํ”„๋กฌํ”„ํŠธ")
# ํฌ์ŠคํŒ… ์Šคํƒ€์ผ ์„ค๋ช… ํ•จ์ˆ˜
def get_style_description(style):
descriptions = {
"์นœ๊ทผํ•œ": "๋…์ž์™€ ๊ฐ€๊นŒ์šด ์นœ๊ตฌ์ฒ˜๋Ÿผ ๋Œ€ํ™”ํ•˜๋Š” ๋“ฏํ•œ ์นœ๊ทผํ•œ ์Šคํƒ€์ผ์ž…๋‹ˆ๋‹ค.",
"์ผ๋ฐ˜": "์ผ๋ฐ˜์ ์ด๊ณ  ์ค‘๋ฆฝ์ ์ธ ํ†ค์œผ๋กœ ์ •๋ณด๋ฅผ ์ „๋‹ฌํ•˜๋Š” ์Šคํƒ€์ผ์ž…๋‹ˆ๋‹ค.",
"์ „๋ฌธ์ ์ธ": "์ „๋ฌธ๊ฐ€์˜ ์‹œ๊ฐ์—์„œ ๊นŠ์ด ์žˆ๋Š” ์ •๋ณด๋ฅผ ์ „๋‹ฌํ•˜๋Š” ์Šคํƒ€์ผ์ž…๋‹ˆ๋‹ค."
}
return descriptions.get(style, "ํฌ์ŠคํŒ… ์Šคํƒ€์ผ์„ ์„ ํƒํ•˜์„ธ์š”.")
# ํ”„๋กฌํ”„ํŠธ ์—…๋ฐ์ดํŠธ ํ•จ์ˆ˜
def update_prompts_and_description(category, style):
blog_post_prompt = get_blog_post_prompt(category)
style_prompt = get_style_prompt(style)
style_description = get_style_description(style)
return blog_post_prompt, style_prompt, style_description
def format_filename(text):
text = re.sub(r'[^\w\s-]', '', text)
return text[:50].strip()
def extract_first_recommended_title(blog_post):
section_match = re.search(r'(?:#+\s*)?์ถ”์ฒœ\s*์ œ๋ชฉ:?\s*\n([\s\S]*?)(?=\n(?:#+|$)|$)', blog_post, re.IGNORECASE)
if section_match:
section_content = section_match.group(1)
title_match = re.search(r'(?:^|\n)\s*(?:\d+\.|-|\*|\โ€ข)?\s*(.*?)(?=\n|$)', section_content)
if title_match:
title = title_match.group(1).strip()
print(f"Extracted title: {title}")
return title
print("No title found")
return "๋ธ”๋กœ๊ทธ_๊ธ€"
class PDF(FPDF):
def __init__(self):
super().__init__()
self.add_font("NanumGothic", "", "NanumGothic.ttf")
self.add_font("NanumGothicBold", "", "NanumGothicBold.ttf")
self.add_font("NanumGothicExtraBold", "", "NanumGothicExtraBold.ttf")
self.add_font("NanumGothicLight", "", "NanumGothicLight.ttf")
def header(self):
# ํ—ค๋”๋ฅผ ๋น„์›Œ๋‘ก๋‹ˆ๋‹ค
pass
def footer(self):
self.set_y(-15)
self.set_font('NanumGothicLight', '', 8)
self.cell(0, 10, f'Page {self.page_no()}', 0, new_x=XPos.RIGHT, new_y=YPos.TOP, align='C')
def save_to_pdf(summary, blog_post, file_type):
pdf = PDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_font("NanumGothicExtraBold", size=16)
pdf.cell(0, 10, "์š”์•ฝ", new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
pdf.ln(5)
pdf.set_font("NanumGothic", size=11)
pdf.multi_cell(0, 6, summary)
pdf.add_page()
pdf.set_font("NanumGothicExtraBold", size=16)
pdf.cell(0, 10, "๋ธ”๋กœ๊ทธ ๊ธ€", new_x=XPos.LMARGIN, new_y=YPos.NEXT, align='C')
pdf.ln(5)
lines = blog_post.split('\n')
for line in lines:
if line.strip() == '':
pdf.ln(3) # ๋นˆ ์ค„์€ ์ž‘์€ ๊ฐ„๊ฒฉ๋งŒ ์ถ”๊ฐ€
elif line.startswith('#'): # ์ œ๋ชฉ์œผ๋กœ ๊ฐ„์ฃผ
pdf.set_font("NanumGothicBold", size=14)
pdf.multi_cell(0, 8, line.lstrip('#').strip())
pdf.ln(2)
elif line.startswith('##'): # ๋ถ€์ œ๋ชฉ์œผ๋กœ ๊ฐ„์ฃผ
pdf.set_font("NanumGothicBold", size=12)
pdf.multi_cell(0, 7, line.lstrip('#').strip())
pdf.ln(2)
else:
pdf.set_font("NanumGothic", size=11)
pdf.multi_cell(0, 6, line.strip())
pdf.ln(1)
title = extract_first_recommended_title(blog_post)
today_date = datetime.now().strftime("%Y%m%d")
filename = f"{today_date}_{format_filename(title)}.pdf"
print(f"Saving PDF as: {filename}")
pdf.output(filename)
return filename
# Gradio ์ธํ„ฐํŽ˜์ด์Šค์šฉ PDF ์ €์žฅ ํ•จ์ˆ˜
def save_content_to_pdf(summary, blog_post):
filename = save_to_pdf(summary, blog_post, "๋ธ”๋กœ๊ทธ")
return filename
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
title = "์œ ํŠœ๋ธŒ๋กœ ๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ"
with gr.Blocks() as demo:
gr.Markdown(f"# {title}")
# 1๋‹จ๊ณ„: ์นดํ…Œ๊ณ ๋ฆฌ ์„ ํƒ
gr.Markdown("### 1๋‹จ๊ณ„: ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ๋ฅผ ์ง€์ •ํ•ด์ฃผ์„ธ์š”", elem_id="step-title")
category = gr.Radio(choices=["์ผ๋ฐ˜ํ˜•","์ •๋ณด์„ฑ", "1๊ฐœ ์ƒํ’ˆ ์ถ”์ฒœํ˜•", "ํ๋ ˆ์ด์…˜ํ˜•"], label="ํฌ์ŠคํŒ… ์นดํ…Œ๊ณ ๋ฆฌ", value="์ผ๋ฐ˜ํ˜•")
# ๊ตฌ๋ถ„์„  ์ถ”๊ฐ€
gr.Markdown("---\n\n")
# 2๋‹จ๊ณ„: ํฌ์ŠคํŒ… ์Šคํƒ€์ผ ์„ ํƒ
gr.Markdown("### 2๋‹จ๊ณ„: ํฌ์ŠคํŒ… ์Šคํƒ€์ผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”", elem_id="step-title")
style = gr.Radio(choices=["์นœ๊ทผํ•œ", "์ผ๋ฐ˜", "์ „๋ฌธ์ ์ธ"], label="ํฌ์ŠคํŒ… ์Šคํƒ€์ผ", value="์นœ๊ทผํ•œ")
style_description = gr.Markdown(f"{get_style_description('์นœ๊ทผํ•œ')}", elem_id="style-description")
# ๊ตฌ๋ถ„์„  ์ถ”๊ฐ€
gr.Markdown("---\n\n")
# 3๋‹จ๊ณ„: ์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”
gr.Markdown("### 3๋‹จ๊ณ„: ์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", elem_id="step-title")
with gr.Row():
youtube_url1 = gr.Textbox(label="YouTube URL 1", placeholder="์ฒซ ๋ฒˆ์งธ ์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”")
youtube_url2 = gr.Textbox(label="YouTube URL 2", placeholder="๋‘ ๋ฒˆ์งธ ์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”")
youtube_url3 = gr.Textbox(label="YouTube URL 3", placeholder="์„ธ ๋ฒˆ์งธ ์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”")
# ์ˆจ๊ฒจ์ง„ ํ…์ŠคํŠธ๋ฐ•์Šค (์‚ฌ์šฉ์ž์—๊ฒŒ ๋ณด์ด์ง€ ์•Š์Œ)
combined_urls = gr.Textbox(visible=False)
transcript_output = gr.Textbox(label="์œ ํŠœ๋ธŒ ํŠธ๋žœ์Šคํฌ๋ฆฝํŠธ", lines=10)
# ์œ ํŠœ๋ธŒ ํŠธ๋žœ์Šคํฌ๋ฆฝํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ํ•จ์ˆ˜
def combine_and_get_transcripts(url1, url2, url3):
urls = [url for url in [url1, url2, url3] if url.strip()]
combined = ",".join(urls)
all_transcripts = []
for url in urls:
transcript = get_transcript(url.strip())
all_transcripts.append(transcript)
return combined, "\n\n---\n\n".join(all_transcripts)
# ์ž…๋ ฅ ๋ณ€๊ฒฝ ์‹œ ํŠธ๋žœ์Šคํฌ๋ฆฝํŠธ ์—…๋ฐ์ดํŠธ
for url_input in [youtube_url1, youtube_url2, youtube_url3]:
url_input.change(
fn=combine_and_get_transcripts,
inputs=[youtube_url1, youtube_url2, youtube_url3],
outputs=[combined_urls, transcript_output]
)
# ์š”์•ฝ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ ๊ธฐ๋Šฅ ์ถ”๊ฐ€
gr.Markdown("### ์š”์•ฝ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ", elem_id="step-title")
with gr.Accordion("์š”์•ฝ๊ธ€ ์„ค์ •", open=False):
summary_system_message = gr.Textbox(
label="์š”์•ฝ๊ธ€ ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€",
value="""
#์œ ํŠœ๋ธŒ ๋Œ€๋ณธ ์š”์•ฝ ๊ทœ์น™
""",
lines=15,
visible=True
)
summary_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=7000, value=5000, step=1000)
summary_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.7, step=0.05)
summary_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
summarize_btn = gr.Button("์š”์•ฝ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
summary_output = gr.Textbox(label="์š”์•ฝ๋œ ๊ธ€", lines=10)
def generate_summary(transcript, system_message, max_tokens, temperature, top_p):
summary = summarize_transcript(transcript, system_message, max_tokens, temperature, top_p)
return summary
summarize_btn.click(
fn=generate_summary,
inputs=[transcript_output, summary_system_message, summary_max_tokens, summary_temperature, summary_top_p],
outputs=[summary_output]
)
# ๊ตฌ๋ถ„์„  ์ถ”๊ฐ€
gr.Markdown("---\n\n")
# 4๋‹จ๊ณ„: ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ
gr.Markdown("### 4๋‹จ๊ณ„: ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ", elem_id="step-title")
gr.HTML("<span style='color: grey;'>[์ƒ์„ฑํ•˜๊ธฐ ๋ฒ„ํŠผ์„ ์„ ํƒํ•ด์ฃผ์„ธ์š”]</span>")
with gr.Accordion("๋ธ”๋กœ๊ทธ ๊ธ€ ์„ค์ •", open=False):
blog_system_message = gr.Textbox(label="์นดํ…Œ๊ณ ๋ฆฌ ํ”„๋กฌํ”„ํŠธ", value=get_blog_post_prompt("์ผ๋ฐ˜ํ˜•"), lines=20, visible=True)
style_prompt_hidden = gr.Textbox(label="์Šคํƒ€์ผ ํ”„๋กฌํ”„ํŠธ", value=get_style_prompt("์นœ๊ทผํ•œ"), lines=10, visible=False) # ์ดˆ๊ธฐ๊ฐ’ ์„ค์ •
blog_max_tokens = gr.Slider(label="Max Tokens", minimum=1000, maximum=12000, value=8000, step=1000)
blog_temperature = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, value=0.8, step=0.1)
blog_top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, value=0.95, step=0.05)
generate_btn = gr.Button("๋ธ”๋กœ๊ทธ ๊ธ€ ์ƒ์„ฑํ•˜๊ธฐ")
blog_output = gr.Textbox(label="์ƒ์„ฑ๋œ ๋ธ”๋กœ๊ทธ ๊ธ€", lines=30)
def generate_blog_content(category, style, transcripts, category_prompt, style_prompt, max_tokens, temperature, top_p):
blog_post = generate_blog_post(category, style, transcripts, category_prompt, style_prompt, max_tokens, temperature, top_p)
return blog_post
generate_btn.click(
fn=generate_blog_content,
inputs=[category, style, transcript_output, blog_system_message, style_prompt_hidden, blog_max_tokens, blog_temperature, blog_top_p],
outputs=[blog_output]
)
# PDF ์ €์žฅ ๋ฒ„ํŠผ ์ถ”๊ฐ€
save_pdf_btn = gr.Button("PDF๋กœ ์ €์žฅํ•˜๊ธฐ")
pdf_output = gr.File(label="์ƒ์„ฑ๋œ PDF ํŒŒ์ผ")
save_pdf_btn.click(
fn=save_content_to_pdf,
inputs=[summary_output, blog_output],
outputs=[pdf_output]
)
# ์นดํ…Œ๊ณ ๋ฆฌ์™€ ์Šคํƒ€์ผ์ด ๋ณ€๊ฒฝ๋  ๋•Œ ํ”„๋กฌํ”„ํŠธ ์—…๋ฐ์ดํŠธ
def update_prompts_and_description(category, style):
blog_post_prompt = get_blog_post_prompt(category)
style_prompt = get_style_prompt(style)
style_description = get_style_description(style)
return blog_post_prompt, style_prompt, style_description
category.change(fn=update_prompts_and_description, inputs=[category, style], outputs=[blog_system_message, style_prompt_hidden, style_description])
style.change(fn=update_prompts_and_description, inputs=[category, style], outputs=[blog_system_message, style_prompt_hidden, style_description])
demo.launch()
# CSS ์Šคํƒ€์ผ ์ถ”๊ฐ€
gr.HTML("""
<style>
#step-title {
font-size: 1.7em;
font-weight: bold;
}
#style-description {
font-size: 1.2em;
}
</style>
""")