Sa-m commited on
Commit
f010cb1
Β·
verified Β·
1 Parent(s): 8fb1201

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -403,17 +403,17 @@ def process_input(input_mode: str, reference_text: str, candidate_text: str, mod
403
  time.sleep(0.1)
404
  cleaned_candidate = clean_text(candidate_text)
405
 
406
- # Format metrics for display
407
  metrics_table = [
408
- ["Metric", "Raw Score", "Normalized"],
409
- ["AnswerRelevancy", f"{result['metrics']['AnswerRelevancy']:.4f}", f"{result['normalized']['AnswerRelevancy']:.4f}"],
410
- ["Faithfulness", f"{result['metrics']['Faithfulness']:.4f}", f"{result['normalized']['Faithfulness']:.4f}"],
411
- ["GEval", f"{result['metrics']['GEval']:.4f}", f"{result['normalized']['GEval']:.4f}"],
412
- ["BERTScore", f"{result['metrics']['BERTScore']:.4f}", f"{result['normalized']['BERTScore']:.4f}"],
413
- ["ROUGE", f"{result['metrics']['ROUGE']:.4f}", f"{result['normalized']['ROUGE']:.4f}"],
414
- ["BLEU", f"{result['metrics']['BLEU']:.4f}", f"{result['normalized']['BLEU']:.4f}"],
415
- ["METEOR", f"{result['metrics']['METEOR']:.4f}", f"{result['normalized']['METEOR']:.4f}"],
416
- ["Weighted Score", f"{result['weighted_score']:.4f}", "N/A"]
417
  ]
418
 
419
  return (
@@ -435,7 +435,7 @@ def load_example():
435
  candidate_text # candidate_text
436
  )
437
 
438
- # Create Gradio interface
439
  with gr.Blocks(title="LLM Evaluation Framework", theme=gr.themes.Soft()) as demo:
440
  gr.Markdown("# πŸ“Š LLM Evaluation Framework for Professional Content Rewriting")
441
  gr.Markdown("Evaluate the quality of LLM-generated content using multiple metrics with proper normalization.")
@@ -571,10 +571,9 @@ with gr.Blocks(title="LLM Evaluation Framework", theme=gr.themes.Soft()) as demo
571
  | **METEOR** | Linguistic quality with synonyms | How natural does the cleaned output read? |
572
  """)
573
 
574
- # Launch the app
575
  if __name__ == "__main__":
576
  demo.launch(
577
  server_name="0.0.0.0",
578
  server_port=7860,
579
- share=True
580
  )
 
403
  time.sleep(0.1)
404
  cleaned_candidate = clean_text(candidate_text)
405
 
406
+ # Format metrics for display - ONLY SHOWING NORMALIZED SCORES AND HYBRID SCORE
407
  metrics_table = [
408
+ ["Metric", "Normalized Score"],
409
+ ["AnswerRelevancy", f"{result['normalized']['AnswerRelevancy']:.4f}"],
410
+ ["Faithfulness", f"{result['normalized']['Faithfulness']:.4f}"],
411
+ ["GEval", f"{result['normalized']['GEval']:.4f}"],
412
+ ["BERTScore", f"{result['normalized']['BERTScore']:.4f}"],
413
+ ["ROUGE", f"{result['normalized']['ROUGE']:.4f}"],
414
+ ["BLEU", f"{result['normalized']['BLEU']:.4f}"],
415
+ ["METEOR", f"{result['normalized']['METEOR']:.4f}"],
416
+ ["Hybrid Score", f"{result['weighted_score']:.4f}"]
417
  ]
418
 
419
  return (
 
435
  candidate_text # candidate_text
436
  )
437
 
438
+
439
  with gr.Blocks(title="LLM Evaluation Framework", theme=gr.themes.Soft()) as demo:
440
  gr.Markdown("# πŸ“Š LLM Evaluation Framework for Professional Content Rewriting")
441
  gr.Markdown("Evaluate the quality of LLM-generated content using multiple metrics with proper normalization.")
 
571
  | **METEOR** | Linguistic quality with synonyms | How natural does the cleaned output read? |
572
  """)
573
 
574
+
575
  if __name__ == "__main__":
576
  demo.launch(
577
  server_name="0.0.0.0",
578
  server_port=7860,
 
579
  )