Spaces:

alx-d
/

PhiRAG

Running

App Files Files Community

PhiRAG / test_filename_generation.py

alx-d

Upload folder using huggingface_hub

f840733 verified about 2 months ago

raw

history blame contribute delete

4.19 kB

	#!/usr/bin/env python3
	"""
	Test script to verify the new CSV filename generation functionality
	"""

	import sys
	import os
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))

	from advanced_rag import get_short_embedding_name, get_short_llm_name, get_varied_parameter

	def test_embedding_names():
	"""Test embedding model name generation"""
	test_cases = [
	("🤗 sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast)", "MiniLM"),
	("🤗 BAAI/bge-base-en-v1.5 (768 dim, excellent)", "BGE-Base"),
	("🟦 Qwen/Qwen3-Embedding-8B (1024 dim, advanced)", "Qwen3-8B"),
	("sentence-transformers/all-mpnet-base-v2", "MPNet"),
	("unknown-model", "unknown")
	]

	print("Testing embedding name generation:")
	for input_name, expected in test_cases:
	result = get_short_embedding_name(input_name)
	status = "✓" if result == expected else "✗"
	print(f" {status} {input_name} -> {result} (expected: {expected})")

	def test_llm_names():
	"""Test LLM model name generation"""
	test_cases = [
	("🇪🇺 Mistral-API", "Mistral"),
	("🇺🇸 Remote Meta-Llama-3", "Llama3"),
	("🇺🇸 GPT-4o", "GPT4o"),
	("mistral-small-latest", "Mistral"),
	("meta-llama/Meta-Llama-3-8B-Instruct", "Llama3"),
	("unknown-model", "unknown")
	]

	print("\nTesting LLM name generation:")
	for input_name, expected in test_cases:
	result = get_short_llm_name(input_name)
	status = "✓" if result == expected else "✗"
	print(f" {status} {input_name} -> {result} (expected: {expected})")

	def test_varied_parameter():
	"""Test varied parameter detection"""
	test_cases = [
	({"temperature": "Constant", "top_p": "Constant", "top_k": "Constant", "bm25": "Constant"}, "None"),
	({"temperature": "Whole range 3 values", "top_p": "Constant", "top_k": "Constant", "bm25": "Constant"}, "temperature"),
	({"temperature": "Constant", "top_p": "Whole range 5 values", "top_k": "Constant", "bm25": "Constant"}, "top_p"),
	({"temperature": "Whole range 3 values", "top_p": "Whole range 5 values", "top_k": "Constant", "bm25": "Constant"}, "Multi"),
	({"temperature": "Constant", "top_p": "Constant", "top_k": "Constant", "bm25": "Whole range 7 values"}, "bm25")
	]

	print("\nTesting varied parameter detection:")
	for param_configs, expected in test_cases:
	result = get_varied_parameter(param_configs)
	status = "✓" if result == expected else "✗"
	print(f" {status} {param_configs} -> {result} (expected: {expected})")

	def test_filename_generation():
	"""Test complete filename generation"""
	from datetime import datetime

	# Mock timestamp for consistent testing
	timestamp = "20241201_120000"

	test_cases = [
	("🤗 sentence-transformers/all-MiniLM-L6-v2 (384 dim, fast)", "🇪🇺 Mistral-API", "temperature", "batch_MiniLM_Mistral_temperature_20241201_120000.csv"),
	("🤗 BAAI/bge-base-en-v1.5 (768 dim, excellent)", "🇺🇸 Remote Meta-Llama-3", "top_p", "batch_BGE-Base_Llama3_top_p_20241201_120000.csv"),
	("🟦 Qwen/Qwen3-Embedding-8B (1024 dim, advanced)", "🇺🇸 GPT-4o", "Multi", "batch_Qwen3-8B_GPT4o_Multi_20241201_120000.csv"),
	("", "", "None", "batch_Unknown_Unknown_None_20241201_120000.csv")
	]

	print("\nTesting complete filename generation:")
	for embedding, llm, param, expected in test_cases:
	short_embedding = get_short_embedding_name(embedding) if embedding else "Unknown"
	short_llm = get_short_llm_name(llm) if llm else "Unknown"
	short_param = param if param else "None"

	filename = f"batch_{short_embedding}_{short_llm}_{short_param}_{timestamp}.csv"
	status = "✓" if filename == expected else "✗"
	print(f" {status} Generated: {filename}")
	print(f" Expected: {expected}")

	if __name__ == "__main__":
	print("Testing CSV filename generation functionality\n")
	test_embedding_names()
	test_llm_names()
	test_varied_parameter()
	test_filename_generation()
	print("\nTest completed!")