Sa-m commited on
Commit
70bcfc2
·
verified ·
1 Parent(s): f9128f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -124
app.py CHANGED
@@ -357,8 +357,8 @@ def evaluate_text(raw_input: str, model_provider: LLMProvider, prompt_template:
357
  "interpretation": interpretation
358
  }
359
 
360
- def process_input(input_text: str, file_upload, model_choice: str, prompt_choice: str) -> Tuple[str, List[List[str]], str]:
361
- """Process either input text or uploaded file"""
362
  if input_text and file_upload:
363
  return "Please use either text input or file upload, not both.", [], ""
364
 
@@ -382,147 +382,151 @@ def process_input(input_text: str, file_upload, model_choice: str, prompt_choice
382
 
383
  # Process single text input
384
  if input_text:
385
- with gr.Progress() as progress:
386
- progress(0.1, desc="Starting evaluation...")
387
- time.sleep(0.2)
388
-
389
- progress(0.3, desc="Generating rewritten content...")
390
- time.sleep(0.2)
391
-
392
- progress(0.6, desc="Calculating metrics...")
393
- result = evaluate_text(input_text, model_provider, prompt_template)
394
-
395
- progress(0.9, desc="Finalizing results...")
396
- time.sleep(0.2)
397
-
398
- # Format metrics for display
399
- metrics_table = [
400
- ["Metric", "Raw Score", "Normalized"],
401
- ["AnswerRelevancy", f"{result['metrics']['AnswerRelevancy']:.4f}", f"{result['normalized']['AnswerRelevancy']:.4f}"],
402
- ["Faithfulness", f"{result['metrics']['Faithfulness']:.4f}", f"{result['normalized']['Faithfulness']:.4f}"],
403
- ["GEval", f"{result['metrics']['GEval']:.4f}", f"{result['normalized']['GEval']:.4f}"],
404
- ["BERTScore", f"{result['metrics']['BERTScore']:.4f}", f"{result['normalized']['BERTScore']:.4f}"],
405
- ["ROUGE", f"{result['metrics']['ROUGE']:.4f}", f"{result['normalized']['ROUGE']:.4f}"],
406
- ["BLEU", f"{result['metrics']['BLEU']:.4f}", f"{result['normalized']['BLEU']:.4f}"],
407
- ["METEOR", f"{result['metrics']['METEOR']:.4f}", f"{result['normalized']['METEOR']:.4f}"],
408
- ["Weighted Score", f"{result['weighted_score']:.4f}", "N/A"]
409
- ]
410
-
411
- return (
412
- result["candidate"],
413
- metrics_table,
414
- f"Hybrid Score: {result['weighted_score']:.4f} - {result['interpretation']}"
415
- )
416
 
417
  # Process file upload
418
  if file_upload:
419
- with gr.Progress() as progress:
420
- progress(0.1, desc="Reading file...")
 
 
 
 
 
421
  time.sleep(0.2)
 
 
 
 
 
 
 
 
 
 
 
 
422
 
423
- # Read the file (assuming CSV with one column of text)
424
  try:
425
- df = pd.read_csv(file_upload.name)
426
- progress(0.3, desc="Processing entries...")
427
- time.sleep(0.2)
 
 
 
 
 
 
 
 
 
428
  except Exception as e:
429
- return f"Error reading file: {str(e)}", [], ""
430
-
431
- # Assuming the first column contains the text
432
- text_column = df.columns[0]
433
- results = []
434
- detailed_results = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
- # Process each entry with progress updates
437
- for i, row in df.iterrows():
438
- progress((i + 1) / len(df) * 0.6 + 0.3, desc=f"Processing entry {i+1}/{len(df)}")
439
- text = str(row[text_column])
440
-
441
- try:
442
- result = evaluate_text(text, model_provider, prompt_template)
443
-
444
- # Add to results
445
- results.append(result["weighted_score"])
446
-
447
- # Store detailed results
448
- detailed_results.append({
449
- "input_preview": text[:100] + "..." if len(text) > 100 else text,
450
- "weighted_score": result["weighted_score"],
451
- "interpretation": result["interpretation"],
452
- "candidate": result["candidate"]
453
- })
454
- except Exception as e:
455
- print(f"Error processing entry {i}: {str(e)}")
456
- results.append(0.0)
457
- detailed_results.append({
458
- "input_preview": text[:100] + "..." if len(text) > 100 else text,
459
- "weighted_score": 0.0,
460
- "interpretation": "Error processing this entry",
461
- "candidate": ""
462
- })
463
 
464
- progress(0.9, desc="Generating summary...")
465
- time.sleep(0.2)
 
 
 
 
 
466
 
467
- # Create results dataframe
468
- results_df = pd.DataFrame(detailed_results)
 
 
 
 
 
 
 
 
469
 
470
- # Generate summary statistics
471
- valid_scores = [s for s in results if s > 0]
472
- if valid_scores:
473
- avg_score = sum(valid_scores) / len(valid_scores)
474
- min_score = min(valid_scores)
475
- max_score = max(valid_scores)
476
-
477
- if avg_score >= 0.85:
478
- summary = "Excellent performance across inputs"
479
- elif avg_score >= 0.70:
480
- summary = "Good performance with room for minor improvements"
481
- elif avg_score >= 0.50:
482
- summary = "Adequate performance but needs refinement"
483
- else:
484
- summary = "Significant improvements needed"
485
-
486
- # Format summary
487
- summary_text = (
488
- f"Processed {len(results)} entries ({len(valid_scores)} successful)\n"
489
- f"Average Hybrid Score: {avg_score:.4f}\n"
490
- f"Range: {min_score:.4f} - {max_score:.4f}\n\n"
491
- f"{summary}"
492
- )
493
-
494
- # Create metrics table for summary
495
- metrics_table = [
496
- ["Metric", "Value"],
497
- ["Entries Processed", f"{len(results)}"],
498
- ["Successful Entries", f"{len(valid_scores)}"],
499
- ["Average Score", f"{avg_score:.4f}"],
500
- ["Best Score", f"{max_score:.4f}"],
501
- ["Worst Score", f"{min_score:.4f}"],
502
- ["Overall Assessment", summary]
503
- ]
504
-
505
- return (
506
- "Batch processing complete. Use the 'Show Details' button to see individual results.",
507
- metrics_table,
508
- summary_text
509
- )
510
- else:
511
- return (
512
- "No successful evaluations. Check your API configuration and input data.",
513
- [["Error", "All evaluations failed"]],
514
- "Error: No successful evaluations. Check your API configuration and input data."
515
- )
516
 
517
- def show_detailed_results(input_text, file_upload, model_choice, prompt_choice):
518
  """Show detailed results for batch processing"""
519
  if not file_upload:
520
  return "No file uploaded for batch processing."
521
 
 
 
 
522
  # Read the file
523
  df = pd.read_csv(file_upload.name)
524
  text_column = df.columns[0]
525
 
 
 
 
526
  # Determine model provider
527
  if model_choice == "Gemini":
528
  model_provider = GeminiProvider("gemini-1.5-flash-latest")
@@ -531,12 +535,19 @@ def show_detailed_results(input_text, file_upload, model_choice, prompt_choice):
531
  else: # Llama-3-8b
532
  model_provider = GroqProvider("llama3-8b-8192")
533
 
 
 
 
534
  # Get prompt template
535
  prompt_template = PROMPT_TEMPLATES[prompt_choice]
536
 
 
 
 
537
  # Process each entry
538
  results = []
539
- for _, row in df.iterrows():
 
540
  text = str(row[text_column])
541
  try:
542
  result = evaluate_text(text, model_provider, prompt_template)
@@ -553,7 +564,9 @@ def show_detailed_results(input_text, file_upload, model_choice, prompt_choice):
553
  "Interpretation": "Processing error",
554
  "Candidate Text": ""
555
  })
 
556
 
 
557
  return gr.Dataframe(value=pd.DataFrame(results))
558
 
559
  # Create Gradio interface
 
357
  "interpretation": interpretation
358
  }
359
 
360
+ def process_input(input_text: str, file_upload, model_choice: str, prompt_choice: str, progress=gr.Progress()) -> Tuple[str, List[List[str]], str]:
361
+ """Process either input text or uploaded file with progress tracking"""
362
  if input_text and file_upload:
363
  return "Please use either text input or file upload, not both.", [], ""
364
 
 
382
 
383
  # Process single text input
384
  if input_text:
385
+ progress(0.1, desc="Starting evaluation...")
386
+ time.sleep(0.2)
387
+
388
+ progress(0.3, desc="Generating rewritten content...")
389
+ time.sleep(0.2)
390
+
391
+ progress(0.6, desc="Calculating metrics...")
392
+ result = evaluate_text(input_text, model_provider, prompt_template)
393
+
394
+ progress(0.9, desc="Finalizing results...")
395
+ time.sleep(0.2)
396
+
397
+ # Format metrics for display
398
+ metrics_table = [
399
+ ["Metric", "Raw Score", "Normalized"],
400
+ ["AnswerRelevancy", f"{result['metrics']['AnswerRelevancy']:.4f}", f"{result['normalized']['AnswerRelevancy']:.4f}"],
401
+ ["Faithfulness", f"{result['metrics']['Faithfulness']:.4f}", f"{result['normalized']['Faithfulness']:.4f}"],
402
+ ["GEval", f"{result['metrics']['GEval']:.4f}", f"{result['normalized']['GEval']:.4f}"],
403
+ ["BERTScore", f"{result['metrics']['BERTScore']:.4f}", f"{result['normalized']['BERTScore']:.4f}"],
404
+ ["ROUGE", f"{result['metrics']['ROUGE']:.4f}", f"{result['normalized']['ROUGE']:.4f}"],
405
+ ["BLEU", f"{result['metrics']['BLEU']:.4f}", f"{result['normalized']['BLEU']:.4f}"],
406
+ ["METEOR", f"{result['metrics']['METEOR']:.4f}", f"{result['normalized']['METEOR']:.4f}"],
407
+ ["Weighted Score", f"{result['weighted_score']:.4f}", "N/A"]
408
+ ]
409
+
410
+ return (
411
+ result["candidate"],
412
+ metrics_table,
413
+ f"Hybrid Score: {result['weighted_score']:.4f} - {result['interpretation']}"
414
+ )
 
415
 
416
  # Process file upload
417
  if file_upload:
418
+ progress(0.1, desc="Reading file...")
419
+ time.sleep(0.2)
420
+
421
+ # Read the file (assuming CSV with one column of text)
422
+ try:
423
+ df = pd.read_csv(file_upload.name)
424
+ progress(0.3, desc="Processing entries...")
425
  time.sleep(0.2)
426
+ except Exception as e:
427
+ return f"Error reading file: {str(e)}", [], ""
428
+
429
+ # Assuming the first column contains the text
430
+ text_column = df.columns[0]
431
+ results = []
432
+ detailed_results = []
433
+
434
+ # Process each entry with progress updates
435
+ for i, row in df.iterrows():
436
+ progress((i + 1) / len(df) * 0.6 + 0.3, desc=f"Processing entry {i+1}/{len(df)}")
437
+ text = str(row[text_column])
438
 
 
439
  try:
440
+ result = evaluate_text(text, model_provider, prompt_template)
441
+
442
+ # Add to results
443
+ results.append(result["weighted_score"])
444
+
445
+ # Store detailed results
446
+ detailed_results.append({
447
+ "input_preview": text[:100] + "..." if len(text) > 100 else text,
448
+ "weighted_score": result["weighted_score"],
449
+ "interpretation": result["interpretation"],
450
+ "candidate": result["candidate"]
451
+ })
452
  except Exception as e:
453
+ print(f"Error processing entry {i}: {str(e)}")
454
+ results.append(0.0)
455
+ detailed_results.append({
456
+ "input_preview": text[:100] + "..." if len(text) > 100 else text,
457
+ "weighted_score": 0.0,
458
+ "interpretation": "Error processing this entry",
459
+ "candidate": ""
460
+ })
461
+
462
+ progress(0.9, desc="Generating summary...")
463
+ time.sleep(0.2)
464
+
465
+ # Create results dataframe
466
+ results_df = pd.DataFrame(detailed_results)
467
+
468
+ # Generate summary statistics
469
+ valid_scores = [s for s in results if s > 0]
470
+ if valid_scores:
471
+ avg_score = sum(valid_scores) / len(valid_scores)
472
+ min_score = min(valid_scores)
473
+ max_score = max(valid_scores)
474
 
475
+ if avg_score >= 0.85:
476
+ summary = "Excellent performance across inputs"
477
+ elif avg_score >= 0.70:
478
+ summary = "Good performance with room for minor improvements"
479
+ elif avg_score >= 0.50:
480
+ summary = "Adequate performance but needs refinement"
481
+ else:
482
+ summary = "Significant improvements needed"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
 
484
+ # Format summary
485
+ summary_text = (
486
+ f"Processed {len(results)} entries ({len(valid_scores)} successful)\n"
487
+ f"Average Hybrid Score: {avg_score:.4f}\n"
488
+ f"Range: {min_score:.4f} - {max_score:.4f}\n\n"
489
+ f"{summary}"
490
+ )
491
 
492
+ # Create metrics table for summary
493
+ metrics_table = [
494
+ ["Metric", "Value"],
495
+ ["Entries Processed", f"{len(results)}"],
496
+ ["Successful Entries", f"{len(valid_scores)}"],
497
+ ["Average Score", f"{avg_score:.4f}"],
498
+ ["Best Score", f"{max_score:.4f}"],
499
+ ["Worst Score", f"{min_score:.4f}"],
500
+ ["Overall Assessment", summary]
501
+ ]
502
 
503
+ return (
504
+ "Batch processing complete. Use the 'Show Details' button to see individual results.",
505
+ metrics_table,
506
+ summary_text
507
+ )
508
+ else:
509
+ return (
510
+ "No successful evaluations. Check your API configuration and input data.",
511
+ [["Error", "All evaluations failed"]],
512
+ "Error: No successful evaluations. Check your API configuration and input data."
513
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
+ def show_detailed_results(input_text, file_upload, model_choice, prompt_choice, progress=gr.Progress()):
516
  """Show detailed results for batch processing"""
517
  if not file_upload:
518
  return "No file uploaded for batch processing."
519
 
520
+ progress(0.1, desc="Reading file...")
521
+ time.sleep(0.1)
522
+
523
  # Read the file
524
  df = pd.read_csv(file_upload.name)
525
  text_column = df.columns[0]
526
 
527
+ progress(0.3, desc="Determining model provider...")
528
+ time.sleep(0.1)
529
+
530
  # Determine model provider
531
  if model_choice == "Gemini":
532
  model_provider = GeminiProvider("gemini-1.5-flash-latest")
 
535
  else: # Llama-3-8b
536
  model_provider = GroqProvider("llama3-8b-8192")
537
 
538
+ progress(0.5, desc="Getting prompt template...")
539
+ time.sleep(0.1)
540
+
541
  # Get prompt template
542
  prompt_template = PROMPT_TEMPLATES[prompt_choice]
543
 
544
+ progress(0.7, desc="Processing entries...")
545
+ time.sleep(0.1)
546
+
547
  # Process each entry
548
  results = []
549
+ for i, row in enumerate(df.iterrows()):
550
+ _, row = row # Unpack the tuple
551
  text = str(row[text_column])
552
  try:
553
  result = evaluate_text(text, model_provider, prompt_template)
 
564
  "Interpretation": "Processing error",
565
  "Candidate Text": ""
566
  })
567
+ progress(0.7 + (i + 1) / len(df) * 0.3, desc=f"Processing entry {i+1}/{len(df)}")
568
 
569
+ progress(1.0, desc="Completed!")
570
  return gr.Dataframe(value=pd.DataFrame(results))
571
 
572
  # Create Gradio interface