Spaces:

DesertWolf
/

test3

Paused

App Files Files Community

test3 / tests /llm_translation /test_cohere.py

DesertWolf

Upload folder using huggingface_hub

447ebeb verified 5 months ago

raw

history blame contribute delete

17.1 kB

	import os
	import sys
	import traceback

	from dotenv import load_dotenv

	load_dotenv()
	import io
	import os

	sys.path.insert(
	0, os.path.abspath("../..")
	) # Adds the parent directory to the system path
	import json

	import pytest

	import litellm
	from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
	from unittest.mock import AsyncMock, patch
	from litellm import RateLimitError, Timeout, completion, completion_cost, embedding
	from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler

	litellm.num_retries = 3


	@pytest.mark.parametrize("stream", [True, False])
	@pytest.mark.flaky(retries=3, delay=1)
	@pytest.mark.asyncio
	async def test_chat_completion_cohere_citations(stream):
	try:
	litellm.set_verbose = True
	messages = [
	{
	"role": "user",
	"content": "Which penguins are the tallest?",
	},
	]
	response = await litellm.acompletion(
	model="cohere_chat/command-r",
	messages=messages,
	documents=[
	{"title": "Tall penguins", "text": "Emperor penguins are the tallest."},
	{
	"title": "Penguin habitats",
	"text": "Emperor penguins only live in Antarctica.",
	},
	],
	stream=stream,
	)

	if stream:
	citations_chunk = False
	async for chunk in response:
	print("received chunk", chunk)
	if "citations" in chunk:
	citations_chunk = True
	break
	assert citations_chunk
	else:
	assert response.citations is not None
	except litellm.ServiceUnavailableError:
	pass
	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	def test_completion_cohere_command_r_plus_function_call():
	litellm.set_verbose = True
	tools = [
	{
	"type": "function",
	"function": {
	"name": "get_current_weather",
	"description": "Get the current weather in a given location",
	"parameters": {
	"type": "object",
	"properties": {
	"location": {
	"type": "string",
	"description": "The city and state, e.g. San Francisco, CA",
	},
	"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
	},
	"required": ["location"],
	},
	},
	}
	]
	messages = [
	{
	"role": "user",
	"content": "What's the weather like in Boston today in Fahrenheit?",
	}
	]
	try:
	# test without max tokens
	response = completion(
	model="command-r-plus",
	messages=messages,
	tools=tools,
	tool_choice="auto",
	)
	# Add any assertions, here to check response args
	print(response)
	assert isinstance(response.choices[0].message.tool_calls[0].function.name, str)
	assert isinstance(
	response.choices[0].message.tool_calls[0].function.arguments, str
	)

	messages.append(
	response.choices[0].message.model_dump()
	) # Add assistant tool invokes
	tool_result = (
	'{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}'
	)
	# Add user submitted tool results in the OpenAI format
	messages.append(
	{
	"tool_call_id": response.choices[0].message.tool_calls[0].id,
	"role": "tool",
	"name": response.choices[0].message.tool_calls[0].function.name,
	"content": tool_result,
	}
	)
	# In the second response, Cohere should deduce answer from tool results
	second_response = completion(
	model="command-r-plus",
	messages=messages,
	tools=tools,
	tool_choice="auto",
	force_single_step=True,
	)
	print(second_response)
	except litellm.Timeout:
	pass
	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	# @pytest.mark.skip(reason="flaky test, times out frequently")
	@pytest.mark.flaky(retries=6, delay=1)
	def test_completion_cohere():
	try:
	# litellm.set_verbose=True
	messages = [
	{"role": "system", "content": "You're a good bot"},
	{"role": "assistant", "content": [{"text": "2", "type": "text"}]},
	{"role": "assistant", "content": [{"text": "3", "type": "text"}]},
	{
	"role": "user",
	"content": "Hey",
	},
	]
	response = completion(
	model="command-r",
	messages=messages,
	)
	print(response)
	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	# FYI - cohere_chat looks quite unstable, even when testing locally
	@pytest.mark.asyncio
	@pytest.mark.parametrize("sync_mode", [True, False])
	@pytest.mark.flaky(retries=3, delay=1)
	async def test_chat_completion_cohere(sync_mode):
	try:
	litellm.set_verbose = True
	messages = [
	{"role": "system", "content": "You're a good bot"},
	{
	"role": "user",
	"content": "Hey",
	},
	]
	if sync_mode is False:
	response = await litellm.acompletion(
	model="cohere_chat/command-r",
	messages=messages,
	max_tokens=10,
	)
	else:
	response = completion(
	model="cohere_chat/command-r",
	messages=messages,
	max_tokens=10,
	)
	print(response)
	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	@pytest.mark.asyncio
	@pytest.mark.parametrize("sync_mode", [False])
	async def test_chat_completion_cohere_stream(sync_mode):
	try:
	litellm.set_verbose = True
	messages = [
	{"role": "system", "content": "You're a good bot"},
	{
	"role": "user",
	"content": "Hey",
	},
	]
	if sync_mode is False:
	response = await litellm.acompletion(
	model="cohere_chat/command-r",
	messages=messages,
	max_tokens=10,
	stream=True,
	)
	print("async cohere stream response", response)
	async for chunk in response:
	print(chunk)
	else:
	response = completion(
	model="cohere_chat/command-r",
	messages=messages,
	max_tokens=10,
	stream=True,
	)
	print(response)
	for chunk in response:
	print(chunk)
	except litellm.APIConnectionError as e:
	pass
	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	@pytest.mark.asyncio
	async def test_cohere_request_body_with_allowed_params():
	"""
	Test to validate that when allowed_openai_params is provided, the request body contains
	the correct response_format and reasoning_effort values.
	"""
	# Define test parameters
	test_response_format = {"type": "json"}
	test_reasoning_effort = "low"
	test_tools = [{
	"type": "function",
	"function": {
	"name": "get_current_time",
	"description": "Get the current time in a given location.",
	"parameters": {
	"type": "object",
	"properties": {
	"location": {"type": "string", "description": "The city name, e.g. San Francisco"}
	},
	"required": ["location"]
	}
	}
	}]

	client = AsyncHTTPHandler()

	# Mock the post method
	with patch.object(client, "post", new=AsyncMock()) as mock_post:
	try:
	await litellm.acompletion(
	model="cohere/command",
	messages=[{"content": "what llm are you", "role": "user"}],
	allowed_openai_params=["tools", "response_format", "reasoning_effort"],
	response_format=test_response_format,
	reasoning_effort=test_reasoning_effort,
	tools=test_tools,
	client=client
	)
	except Exception:
	pass # We only care about the request body validation

	# Verify the API call was made
	mock_post.assert_called_once()

	# Get and parse the request body
	request_data = json.loads(mock_post.call_args.kwargs["data"])
	print(f"request_data: {request_data}")

	# Validate request contains our specified parameters
	assert "allowed_openai_params" not in request_data
	assert request_data["response_format"] == test_response_format
	assert request_data["reasoning_effort"] == test_reasoning_effort


	def test_cohere_embedding_outout_dimensions():
	litellm._turn_on_debug()
	response = embedding(model="cohere/embed-v4.0", input="Hello, world!", dimensions=512)
	print(f"response: {response}\n")
	assert len(response.data[0]["embedding"]) == 512


	# Comprehensive Cohere Embed v4 tests
	@pytest.mark.parametrize("sync_mode", [True, False])
	@pytest.mark.asyncio
	async def test_cohere_embed_v4_basic_text(sync_mode):
	"""Test basic text embedding functionality with Cohere Embed v4."""
	try:
	data = {
	"model": "cohere/embed-v4.0",
	"input": ["Hello world!", "This is a test sentence."],
	"input_type": "search_document"
	}

	if sync_mode:
	response = embedding(**data)
	else:
	response = await litellm.aembedding(**data)

	# Validate response structure
	assert response.model is not None
	assert len(response.data) == 2
	assert response.data[0]['object'] == 'embedding'
	assert len(response.data[0]['embedding']) > 0
	assert response.usage.prompt_tokens > 0
	assert isinstance(response.usage, litellm.Usage)

	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	@pytest.mark.parametrize("sync_mode", [True, False])
	@pytest.mark.asyncio
	async def test_cohere_embed_v4_with_dimensions(sync_mode):
	"""Test Cohere Embed v4 with specific dimension parameter."""
	try:
	data = {
	"model": "cohere/embed-v4.0",
	"input": ["Test with custom dimensions"],
	"dimensions": 512,
	"input_type": "search_query"
	}

	if sync_mode:
	response = embedding(**data)
	else:
	response = await litellm.aembedding(**data)

	# Validate dimension
	assert len(response.data[0]['embedding']) == 512
	assert isinstance(response.usage, litellm.Usage)

	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	@pytest.mark.parametrize("sync_mode", [True, False])
	@pytest.mark.asyncio
	async def test_cohere_embed_v4_image_embedding(sync_mode):
	"""Test Cohere Embed v4 image embedding functionality (multimodal)."""
	try:
	import base64

	# 1x1 pixel red PNG (base64 encoded)
	test_image_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\x0cIDATx\x9cc\xf8\x00\x00\x00\x01\x00\x01\x00\x00\x00\x00'
	test_image_b64 = base64.b64encode(test_image_data).decode('utf-8')

	data = {
	"model": "cohere/embed-v4.0",
	"input": [test_image_b64],
	"input_type": "image"
	}

	if sync_mode:
	response = embedding(**data)
	else:
	response = await litellm.aembedding(**data)

	# Validate response structure for image embedding
	assert response.model is not None
	assert len(response.data) == 1
	assert response.data[0]['object'] == 'embedding'
	assert len(response.data[0]['embedding']) > 0
	assert isinstance(response.usage, litellm.Usage)

	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	@pytest.mark.parametrize("input_type", ["search_document", "search_query", "classification", "clustering"])
	@pytest.mark.asyncio
	async def test_cohere_embed_v4_input_types(input_type):
	"""Test Cohere Embed v4 with different input types."""
	try:
	response = await litellm.aembedding(
	model="cohere/embed-v4.0",
	input=[f"Test text for {input_type}"],
	input_type=input_type
	)

	assert response.model is not None
	assert len(response.data) == 1
	assert response.data[0]['object'] == 'embedding'
	assert len(response.data[0]['embedding']) > 0
	assert isinstance(response.usage, litellm.Usage)

	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	def test_cohere_embed_v4_encoding_format():
	"""Test Cohere Embed v4 with different encoding formats."""
	try:
	response = embedding(
	model="cohere/embed-v4.0",
	input=["Test encoding format"],
	encoding_format="float"
	)

	assert response.model is not None
	assert len(response.data) == 1
	assert response.data[0]['object'] == 'embedding'
	assert len(response.data[0]['embedding']) > 0
	# Validate that embeddings are floats
	assert all(isinstance(x, float) for x in response.data[0]['embedding'])
	assert isinstance(response.usage, litellm.Usage)

	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	def test_cohere_embed_v4_error_handling():
	"""Test error handling for Cohere Embed v4 with invalid inputs."""
	try:
	# Test with empty input - should raise an error
	try:
	response = embedding(
	model="cohere/embed-v4.0",
	input=[] # Empty input
	)
	pytest.fail("Should have failed with empty input")
	except Exception:
	pass # Expected to fail

	# Test with None input - should raise an error
	try:
	response = embedding(
	model="cohere/embed-v4.0",
	input=None
	)
	pytest.fail("Should have failed with None input")
	except Exception:
	pass # Expected to fail

	except Exception as e:
	pytest.fail(f"Error in error handling test: {e}")


	@pytest.mark.parametrize("sync_mode", [True, False])
	@pytest.mark.asyncio
	async def test_cohere_embed_v4_multiple_texts(sync_mode):
	"""Test Cohere Embed v4 with multiple text inputs."""
	try:
	texts = [
	"The quick brown fox jumps over the lazy dog",
	"Machine learning is transforming the world",
	"Python is a versatile programming language",
	"Natural language processing enables human-computer interaction"
	]

	data = {
	"model": "cohere/embed-v4.0",
	"input": texts,
	"input_type": "search_document"
	}

	if sync_mode:
	response = embedding(**data)
	else:
	response = await litellm.aembedding(**data)

	# Validate response structure
	assert response.model is not None
	assert len(response.data) == len(texts)

	for i, data_item in enumerate(response.data):
	assert data_item['object'] == 'embedding'
	assert data_item['index'] == i
	assert len(data_item['embedding']) > 0
	assert all(isinstance(x, float) for x in data_item['embedding'])

	assert isinstance(response.usage, litellm.Usage)
	assert response.usage.prompt_tokens > 0

	except Exception as e:
	pytest.fail(f"Error occurred: {e}")


	def test_cohere_embed_v4_with_optional_params():
	"""Test Cohere Embed v4 with various optional parameters."""
	try:
	response = embedding(
	model="cohere/embed-v4.0",
	input=["Test with optional parameters"],
	input_type="search_query",
	dimensions=256,
	encoding_format="float"
	)

	# Validate response
	assert response.model is not None
	assert len(response.data) == 1
	assert response.data[0]['object'] == 'embedding'
	assert len(response.data[0]['embedding']) == 256 # Custom dimensions
	assert all(isinstance(x, float) for x in response.data[0]['embedding'])
	assert isinstance(response.usage, litellm.Usage)

	except Exception as e:
	pytest.fail(f"Error occurred: {e}")