Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ import gradio as gr
|
|
| 12 |
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
|
| 14 |
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 15 |
-
max_tokens =
|
| 16 |
|
| 17 |
def clean_text(text):
|
| 18 |
text = re.sub(r'\[speaker_\d+\]', '', text)
|
|
@@ -164,9 +164,9 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
|
|
| 164 |
prompt = f"""
|
| 165 |
Analyze the following text (likely a transcript or document) and:
|
| 166 |
|
| 167 |
-
1. First, identify
|
| 168 |
2. For each segment/topic you identify:
|
| 169 |
-
- Provide a
|
| 170 |
- List 3-5 key concepts discussed in that segment
|
| 171 |
- Write a brief summary of that segment (3-5 sentences)
|
| 172 |
- Create 5 quiz questions based DIRECTLY on the content in that segment
|
|
@@ -215,7 +215,7 @@ def analyze_segment_with_gemini(cluster_text, is_full_text=False):
|
|
| 215 |
else:
|
| 216 |
prompt = f"""
|
| 217 |
Analyze the following text segment and provide:
|
| 218 |
-
1. A
|
| 219 |
2. 3-5 key concepts discussed
|
| 220 |
3. A brief summary (6-7 sentences)
|
| 221 |
4. Create 5 quiz questions based DIRECTLY on the text content (not from your summary)
|
|
@@ -298,7 +298,7 @@ def process_document_with_quiz(text):
|
|
| 298 |
token_count = len(tokenizer.encode(text))
|
| 299 |
print(f"Text contains {token_count} tokens")
|
| 300 |
|
| 301 |
-
if token_count <
|
| 302 |
print("Text is short enough to analyze directly without text segmentation")
|
| 303 |
full_analysis = analyze_segment_with_gemini(text, is_full_text=True)
|
| 304 |
|
|
|
|
| 12 |
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
|
| 14 |
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 15 |
+
max_tokens = 3000
|
| 16 |
|
| 17 |
def clean_text(text):
|
| 18 |
text = re.sub(r'\[speaker_\d+\]', '', text)
|
|
|
|
| 164 |
prompt = f"""
|
| 165 |
Analyze the following text (likely a transcript or document) and:
|
| 166 |
|
| 167 |
+
1. First, do text segmentation and identify DISTINCT key topics within the text
|
| 168 |
2. For each segment/topic you identify:
|
| 169 |
+
- Provide a SPECIFIC and UNIQUE topic name (3-5 words) that clearly differentiates it from other segments
|
| 170 |
- List 3-5 key concepts discussed in that segment
|
| 171 |
- Write a brief summary of that segment (3-5 sentences)
|
| 172 |
- Create 5 quiz questions based DIRECTLY on the content in that segment
|
|
|
|
| 215 |
else:
|
| 216 |
prompt = f"""
|
| 217 |
Analyze the following text segment and provide:
|
| 218 |
+
1. A SPECIFIC and DESCRIPTIVE topic name (3-5 words) that precisely captures the main focus
|
| 219 |
2. 3-5 key concepts discussed
|
| 220 |
3. A brief summary (6-7 sentences)
|
| 221 |
4. Create 5 quiz questions based DIRECTLY on the text content (not from your summary)
|
|
|
|
| 298 |
token_count = len(tokenizer.encode(text))
|
| 299 |
print(f"Text contains {token_count} tokens")
|
| 300 |
|
| 301 |
+
if token_count < 7000:
|
| 302 |
print("Text is short enough to analyze directly without text segmentation")
|
| 303 |
full_analysis = analyze_segment_with_gemini(text, is_full_text=True)
|
| 304 |
|