Spaces:

DesertWolf
/

test3

Paused

App Files Files Community

test3 / tests /test_end_users.py

DesertWolf

Upload folder using huggingface_hub

447ebeb verified 5 months ago

raw

history blame contribute delete

8.4 kB

	# What is this?
	## Unit tests for the /end_users/* endpoints
	import pytest
	import asyncio
	import aiohttp
	import time
	import uuid
	from openai import AsyncOpenAI
	from typing import Optional

	"""
	- `/end_user/new`
	- `/end_user/info`
	"""


	async def chat_completion_with_headers(session, key, model="gpt-4"):
	url = "http://0.0.0.0:4000/chat/completions"
	headers = {
	"Authorization": f"Bearer {key}",
	"Content-Type": "application/json",
	}
	data = {
	"model": model,
	"messages": [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "Hello!"},
	],
	}

	async with session.post(url, headers=headers, json=data) as response:
	status = response.status
	response_text = await response.text()

	print(response_text)
	print()

	if status != 200:
	raise Exception(f"Request did not return a 200 status code: {status}")

	response_header_check(
	response
	) # calling the function to check response headers

	raw_headers = response.raw_headers
	raw_headers_json = {}

	for (
	item
	) in (
	response.raw_headers
	): # ((b'date', b'Fri, 19 Apr 2024 21:17:29 GMT'), (), )
	raw_headers_json[item[0].decode("utf-8")] = item[1].decode("utf-8")

	return raw_headers_json


	async def generate_key(
	session,
	i,
	budget=None,
	budget_duration=None,
	models=["azure-models", "gpt-4", "dall-e-3"],
	max_parallel_requests: Optional[int] = None,
	user_id: Optional[str] = None,
	team_id: Optional[str] = None,
	calling_key="sk-1234",
	):
	url = "http://0.0.0.0:4000/key/generate"
	headers = {
	"Authorization": f"Bearer {calling_key}",
	"Content-Type": "application/json",
	}
	data = {
	"models": models,
	"aliases": {"mistral-7b": "gpt-3.5-turbo"},
	"duration": None,
	"max_budget": budget,
	"budget_duration": budget_duration,
	"max_parallel_requests": max_parallel_requests,
	"user_id": user_id,
	"team_id": team_id,
	}

	print(f"data: {data}")

	async with session.post(url, headers=headers, json=data) as response:
	status = response.status
	response_text = await response.text()

	print(f"Response {i} (Status code: {status}):")
	print(response_text)
	print()

	if status != 200:
	raise Exception(f"Request {i} did not return a 200 status code: {status}")

	return await response.json()


	async def new_end_user(
	session,
	i,
	user_id=str(uuid.uuid4()),
	model_region=None,
	default_model=None,
	budget_id=None,
	):
	url = "http://0.0.0.0:4000/end_user/new"
	headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
	data = {
	"user_id": user_id,
	"allowed_model_region": model_region,
	"default_model": default_model,
	}

	if budget_id is not None:
	data["budget_id"] = budget_id
	print("end user data: {}".format(data))

	async with session.post(url, headers=headers, json=data) as response:
	status = response.status
	response_text = await response.text()

	print(f"Response {i} (Status code: {status}):")
	print(response_text)
	print()

	if status != 200:
	raise Exception(f"Request {i} did not return a 200 status code: {status}")

	return await response.json()


	async def new_budget(session, i, budget_id=None):
	url = "http://0.0.0.0:4000/budget/new"
	headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
	data = {
	"budget_id": budget_id,
	"tpm_limit": 2,
	}

	async with session.post(url, headers=headers, json=data) as response:
	status = response.status
	response_text = await response.text()

	print(f"Response {i} (Status code: {status}):")
	print(response_text)
	print()


	@pytest.mark.asyncio
	async def test_end_user_new():
	"""
	Make 20 parallel calls to /user/new. Assert all worked.
	"""
	async with aiohttp.ClientSession() as session:
	tasks = [new_end_user(session, i, str(uuid.uuid4())) for i in range(1, 11)]
	await asyncio.gather(*tasks)


	@pytest.mark.asyncio
	async def test_aaaend_user_specific_region():
	"""
	- Specify region user can make calls in
	- Make a generic call
	- assert returned api base is for model in region

	Repeat 3 times
	"""
	key: str = ""
	## CREATE USER ##
	async with aiohttp.ClientSession() as session:
	end_user_obj = await new_end_user(
	session=session,
	i=0,
	user_id=str(uuid.uuid4()),
	model_region="eu",
	)

	## MAKE CALL ##
	key_gen = await generate_key(
	session=session, i=0, models=["gpt-3.5-turbo-end-user-test"]
	)

	key = key_gen["key"]

	for _ in range(3):
	client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000", max_retries=0)

	print("SENDING USER PARAM - {}".format(end_user_obj["user_id"]))
	result = await client.chat.completions.with_raw_response.create(
	model="gpt-3.5-turbo-end-user-test",
	messages=[{"role": "user", "content": "Hey!"}],
	user=end_user_obj["user_id"],
	)

	assert result.headers.get("x-litellm-model-region") == "eu"


	@pytest.mark.asyncio
	async def test_enduser_tpm_limits_non_master_key():
	"""
	1. budget_id = Create Budget with tpm_limit = 10
	2. create end_user with budget_id
	3. Make /chat/completions calls
	4. Sleep 1 second
	4. Make /chat/completions call -> expect this to fail because rate limit hit
	"""
	async with aiohttp.ClientSession() as session:
	# create a budget with budget_id = "free-tier"
	budget_id = f"free-tier-{uuid.uuid4()}"
	await new_budget(session, 0, budget_id=budget_id)
	await asyncio.sleep(2)

	end_user_id = str(uuid.uuid4())

	await new_end_user(
	session=session, i=0, user_id=end_user_id, budget_id=budget_id
	)

	## MAKE CALL ##
	key_gen = await generate_key(session=session, i=0, models=[])

	key = key_gen["key"]

	# chat completion 1
	client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000", max_retries=0)

	# chat completion 2
	passed = 0
	for _ in range(10):
	try:
	result = await client.chat.completions.create(
	model="fake-openai-endpoint",
	messages=[{"role": "user", "content": "Hey!"}],
	user=end_user_id,
	)
	passed += 1
	except Exception:
	pass
	print("Passed requests=", passed)

	assert (
	passed < 5
	), f"Sent 10 requests and end-user has tpm_limit of 2. Number requests passed: {passed}. Expected less than 5 to pass"


	@pytest.mark.asyncio
	async def test_enduser_tpm_limits_with_master_key():
	"""
	1. budget_id = Create Budget with tpm_limit = 10
	2. create end_user with budget_id
	3. Make /chat/completions calls
	4. Sleep 1 second
	4. Make /chat/completions call -> expect this to fail because rate limit hit
	"""
	async with aiohttp.ClientSession() as session:
	# create a budget with budget_id = "free-tier"
	budget_id = f"free-tier-{uuid.uuid4()}"
	await new_budget(session, 0, budget_id=budget_id)

	end_user_id = str(uuid.uuid4())

	await new_end_user(
	session=session, i=0, user_id=end_user_id, budget_id=budget_id
	)

	# chat completion 1
	client = AsyncOpenAI(
	api_key="sk-1234", base_url="http://0.0.0.0:4000", max_retries=0
	)

	# chat completion 2
	passed = 0
	for _ in range(10):
	try:
	result = await client.chat.completions.create(
	model="fake-openai-endpoint",
	messages=[{"role": "user", "content": "Hey!"}],
	user=end_user_id,
	)
	passed += 1
	except Exception:
	pass
	print("Passed requests=", passed)

	assert (
	passed < 5
	), f"Sent 10 requests and end-user has tpm_limit of 2. Number requests passed: {passed}. Expected less than 5 to pass"