Spaces:

DesertWolf
/

test3

Paused

App Files Files Community

test3 / tests /test_fallbacks.py

DesertWolf

Upload folder using huggingface_hub

447ebeb verified 5 months ago

raw

history blame contribute delete

10.6 kB

	# What is this?
	## This tests if the proxy fallbacks work as expected
	import pytest
	import asyncio
	import aiohttp
	from tests.large_text import text
	import time
	from typing import Optional


	async def generate_key(
	session,
	i,
	models: list,
	calling_key="sk-1234",
	):
	url = "http://0.0.0.0:4000/key/generate"
	headers = {
	"Authorization": f"Bearer {calling_key}",
	"Content-Type": "application/json",
	}
	data = {
	"models": models,
	}

	print(f"data: {data}")

	async with session.post(url, headers=headers, json=data) as response:
	status = response.status
	response_text = await response.text()

	print(f"Response {i} (Status code: {status}):")
	print(response_text)
	print()

	if status != 200:
	raise Exception(f"Request {i} did not return a 200 status code: {status}")

	return await response.json()


	async def chat_completion(
	session,
	key: str,
	model: str,
	messages: list,
	return_headers: bool = False,
	extra_headers: Optional[dict] = None,
	**kwargs,
	):
	url = "http://0.0.0.0:4000/chat/completions"
	headers = {
	"Authorization": f"Bearer {key}",
	"Content-Type": "application/json",
	}
	if extra_headers is not None:
	headers.update(extra_headers)
	data = {"model": model, "messages": messages, **kwargs}

	async with session.post(url, headers=headers, json=data) as response:
	status = response.status
	response_text = await response.text()

	print(response_text)
	print()

	if status != 200:
	if return_headers:
	return None, response.headers
	else:
	raise Exception(f"Request did not return a 200 status code: {status}")

	if return_headers:
	return await response.json(), response.headers
	else:
	return await response.json()


	@pytest.mark.asyncio
	async def test_chat_completion():
	"""
	make chat completion call with prompt > context window. expect it to work with fallback
	"""
	async with aiohttp.ClientSession() as session:
	model = "gpt-3.5-turbo"
	messages = [
	{"role": "system", "content": text},
	{"role": "user", "content": "Who was Alexander?"},
	]
	await chat_completion(
	session=session, key="sk-1234", model=model, messages=messages
	)


	@pytest.mark.parametrize("has_access", [True, False])
	@pytest.mark.asyncio
	async def test_chat_completion_client_fallbacks(has_access):
	"""
	make chat completion call with prompt > context window. expect it to work with fallback
	"""

	async with aiohttp.ClientSession() as session:
	models = ["gpt-3.5-turbo"]

	if has_access:
	models.append("gpt-instruct")

	## CREATE KEY WITH MODELS
	generated_key = await generate_key(session=session, i=0, models=models)
	calling_key = generated_key["key"]
	model = "gpt-3.5-turbo"
	messages = [
	{"role": "user", "content": "Who was Alexander?"},
	]

	## CALL PROXY
	try:
	await chat_completion(
	session=session,
	key=calling_key,
	model=model,
	messages=messages,
	mock_testing_fallbacks=True,
	fallbacks=["gpt-instruct"],
	)
	if not has_access:
	pytest.fail(
	"Expected this to fail, submitted fallback model that key did not have access to"
	)
	except Exception as e:
	if has_access:
	pytest.fail("Expected this to work: {}".format(str(e)))


	@pytest.mark.asyncio
	async def test_chat_completion_with_retries():
	"""
	make chat completion call with prompt > context window. expect it to work with fallback
	"""
	async with aiohttp.ClientSession() as session:
	model = "fake-openai-endpoint-4"
	messages = [
	{"role": "system", "content": text},
	{"role": "user", "content": "Who was Alexander?"},
	]
	response, headers = await chat_completion(
	session=session,
	key="sk-1234",
	model=model,
	messages=messages,
	mock_testing_rate_limit_error=True,
	return_headers=True,
	)
	print(f"headers: {headers}")
	assert headers["x-litellm-attempted-retries"] == "1"
	assert headers["x-litellm-max-retries"] == "50"


	@pytest.mark.asyncio
	async def test_chat_completion_with_fallbacks():
	"""
	make chat completion call with prompt > context window. expect it to work with fallback
	"""
	async with aiohttp.ClientSession() as session:
	model = "badly-configured-openai-endpoint"
	messages = [
	{"role": "system", "content": text},
	{"role": "user", "content": "Who was Alexander?"},
	]
	response, headers = await chat_completion(
	session=session,
	key="sk-1234",
	model=model,
	messages=messages,
	fallbacks=["fake-openai-endpoint-5"],
	return_headers=True,
	)
	print(f"headers: {headers}")
	assert headers["x-litellm-attempted-fallbacks"] == "1"


	@pytest.mark.asyncio
	async def test_chat_completion_with_timeout():
	"""
	make chat completion call with low timeout and `mock_timeout`: true. Expect it to fail and correct timeout to be set in headers.
	"""
	async with aiohttp.ClientSession() as session:
	model = "fake-openai-endpoint-5"
	messages = [
	{"role": "system", "content": text},
	{"role": "user", "content": "Who was Alexander?"},
	]
	start_time = time.time()
	response, headers = await chat_completion(
	session=session,
	key="sk-1234",
	model=model,
	messages=messages,
	num_retries=0,
	mock_timeout=True,
	return_headers=True,
	)
	end_time = time.time()
	print(f"headers: {headers}")
	assert (
	headers["x-litellm-timeout"] == "1.0"
	) # assert model-specific timeout used


	@pytest.mark.asyncio
	async def test_chat_completion_with_timeout_from_request():
	"""
	make chat completion call with low timeout and `mock_timeout`: true. Expect it to fail and correct timeout to be set in headers.
	"""
	async with aiohttp.ClientSession() as session:
	model = "fake-openai-endpoint-5"
	messages = [
	{"role": "system", "content": text},
	{"role": "user", "content": "Who was Alexander?"},
	]
	extra_headers = {
	"x-litellm-timeout": "0.001",
	}
	start_time = time.time()
	response, headers = await chat_completion(
	session=session,
	key="sk-1234",
	model=model,
	messages=messages,
	num_retries=0,
	mock_timeout=True,
	extra_headers=extra_headers,
	return_headers=True,
	)
	end_time = time.time()
	print(f"headers: {headers}")
	assert (
	headers["x-litellm-timeout"] == "0.001"
	) # assert model-specific timeout used


	@pytest.mark.parametrize("has_access", [True, False])
	@pytest.mark.asyncio
	async def test_chat_completion_client_fallbacks_with_custom_message(has_access):
	"""
	make chat completion call with prompt > context window. expect it to work with fallback
	"""

	async with aiohttp.ClientSession() as session:
	models = ["gpt-3.5-turbo"]

	if has_access:
	models.append("gpt-instruct")

	## CREATE KEY WITH MODELS
	generated_key = await generate_key(session=session, i=0, models=models)
	calling_key = generated_key["key"]
	model = "gpt-3.5-turbo"
	messages = [
	{"role": "user", "content": "Who was Alexander?"},
	]

	## CALL PROXY
	try:
	await chat_completion(
	session=session,
	key=calling_key,
	model=model,
	messages=messages,
	mock_testing_fallbacks=True,
	fallbacks=[
	{
	"model": "gpt-instruct",
	"messages": [
	{
	"role": "assistant",
	"content": "This is a custom message",
	}
	],
	}
	],
	)
	if not has_access:
	pytest.fail(
	"Expected this to fail, submitted fallback model that key did not have access to"
	)
	except Exception as e:
	if has_access:
	pytest.fail("Expected this to work: {}".format(str(e)))


	import asyncio
	from openai import AsyncOpenAI
	from typing import List
	import time


	async def make_request(client: AsyncOpenAI, model: str) -> bool:
	try:
	await client.chat.completions.create(
	model=model,
	messages=[{"role": "user", "content": "Who was Alexander?"}],
	)
	return True
	except Exception as e:
	print(f"Error with {model}: {str(e)}")
	return False


	async def run_good_model_test(client: AsyncOpenAI, num_requests: int) -> bool:
	tasks = [make_request(client, "good-model") for _ in range(num_requests)]
	good_results = await asyncio.gather(*tasks)
	return all(good_results)


	@pytest.mark.asyncio
	async def test_chat_completion_bad_and_good_model():
	"""
	Prod test - ensure even if bad model is down, good model is still working.
	"""
	client = AsyncOpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
	num_requests = 100
	num_iterations = 3

	for iteration in range(num_iterations):
	print(f"\nIteration {iteration + 1}/{num_iterations}")
	start_time = time.time()

	# Fire and forget bad model requests
	for _ in range(num_requests):
	asyncio.create_task(make_request(client, "bad-model"))

	# Wait only for good model requests
	success = await run_good_model_test(client, num_requests)
	print(
	f"Iteration {iteration + 1}: {'✓' if success else '✗'} ({time.time() - start_time:.2f}s)"
	)
	assert success, "Not all good model requests succeeded"