Spaces:
Paused
Paused
| import os | |
| import sys | |
| import traceback | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import io | |
| import os | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system path | |
| import json | |
| import pytest | |
| import litellm | |
| from litellm import RateLimitError, Timeout, completion, completion_cost, embedding | |
| from unittest.mock import AsyncMock, patch | |
| from litellm import RateLimitError, Timeout, completion, completion_cost, embedding | |
| from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler | |
| litellm.num_retries = 3 | |
| async def test_chat_completion_cohere_citations(stream): | |
| try: | |
| litellm.set_verbose = True | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": "Which penguins are the tallest?", | |
| }, | |
| ] | |
| response = await litellm.acompletion( | |
| model="cohere_chat/command-r", | |
| messages=messages, | |
| documents=[ | |
| {"title": "Tall penguins", "text": "Emperor penguins are the tallest."}, | |
| { | |
| "title": "Penguin habitats", | |
| "text": "Emperor penguins only live in Antarctica.", | |
| }, | |
| ], | |
| stream=stream, | |
| ) | |
| if stream: | |
| citations_chunk = False | |
| async for chunk in response: | |
| print("received chunk", chunk) | |
| if "citations" in chunk: | |
| citations_chunk = True | |
| break | |
| assert citations_chunk | |
| else: | |
| assert response.citations is not None | |
| except litellm.ServiceUnavailableError: | |
| pass | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| def test_completion_cohere_command_r_plus_function_call(): | |
| litellm.set_verbose = True | |
| tools = [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "get_current_weather", | |
| "description": "Get the current weather in a given location", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "location": { | |
| "type": "string", | |
| "description": "The city and state, e.g. San Francisco, CA", | |
| }, | |
| "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, | |
| }, | |
| "required": ["location"], | |
| }, | |
| }, | |
| } | |
| ] | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": "What's the weather like in Boston today in Fahrenheit?", | |
| } | |
| ] | |
| try: | |
| # test without max tokens | |
| response = completion( | |
| model="command-r-plus", | |
| messages=messages, | |
| tools=tools, | |
| tool_choice="auto", | |
| ) | |
| # Add any assertions, here to check response args | |
| print(response) | |
| assert isinstance(response.choices[0].message.tool_calls[0].function.name, str) | |
| assert isinstance( | |
| response.choices[0].message.tool_calls[0].function.arguments, str | |
| ) | |
| messages.append( | |
| response.choices[0].message.model_dump() | |
| ) # Add assistant tool invokes | |
| tool_result = ( | |
| '{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}' | |
| ) | |
| # Add user submitted tool results in the OpenAI format | |
| messages.append( | |
| { | |
| "tool_call_id": response.choices[0].message.tool_calls[0].id, | |
| "role": "tool", | |
| "name": response.choices[0].message.tool_calls[0].function.name, | |
| "content": tool_result, | |
| } | |
| ) | |
| # In the second response, Cohere should deduce answer from tool results | |
| second_response = completion( | |
| model="command-r-plus", | |
| messages=messages, | |
| tools=tools, | |
| tool_choice="auto", | |
| force_single_step=True, | |
| ) | |
| print(second_response) | |
| except litellm.Timeout: | |
| pass | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| # @pytest.mark.skip(reason="flaky test, times out frequently") | |
| def test_completion_cohere(): | |
| try: | |
| # litellm.set_verbose=True | |
| messages = [ | |
| {"role": "system", "content": "You're a good bot"}, | |
| {"role": "assistant", "content": [{"text": "2", "type": "text"}]}, | |
| {"role": "assistant", "content": [{"text": "3", "type": "text"}]}, | |
| { | |
| "role": "user", | |
| "content": "Hey", | |
| }, | |
| ] | |
| response = completion( | |
| model="command-r", | |
| messages=messages, | |
| ) | |
| print(response) | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| # FYI - cohere_chat looks quite unstable, even when testing locally | |
| async def test_chat_completion_cohere(sync_mode): | |
| try: | |
| litellm.set_verbose = True | |
| messages = [ | |
| {"role": "system", "content": "You're a good bot"}, | |
| { | |
| "role": "user", | |
| "content": "Hey", | |
| }, | |
| ] | |
| if sync_mode is False: | |
| response = await litellm.acompletion( | |
| model="cohere_chat/command-r", | |
| messages=messages, | |
| max_tokens=10, | |
| ) | |
| else: | |
| response = completion( | |
| model="cohere_chat/command-r", | |
| messages=messages, | |
| max_tokens=10, | |
| ) | |
| print(response) | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| async def test_chat_completion_cohere_stream(sync_mode): | |
| try: | |
| litellm.set_verbose = True | |
| messages = [ | |
| {"role": "system", "content": "You're a good bot"}, | |
| { | |
| "role": "user", | |
| "content": "Hey", | |
| }, | |
| ] | |
| if sync_mode is False: | |
| response = await litellm.acompletion( | |
| model="cohere_chat/command-r", | |
| messages=messages, | |
| max_tokens=10, | |
| stream=True, | |
| ) | |
| print("async cohere stream response", response) | |
| async for chunk in response: | |
| print(chunk) | |
| else: | |
| response = completion( | |
| model="cohere_chat/command-r", | |
| messages=messages, | |
| max_tokens=10, | |
| stream=True, | |
| ) | |
| print(response) | |
| for chunk in response: | |
| print(chunk) | |
| except litellm.APIConnectionError as e: | |
| pass | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| async def test_cohere_request_body_with_allowed_params(): | |
| """ | |
| Test to validate that when allowed_openai_params is provided, the request body contains | |
| the correct response_format and reasoning_effort values. | |
| """ | |
| # Define test parameters | |
| test_response_format = {"type": "json"} | |
| test_reasoning_effort = "low" | |
| test_tools = [{ | |
| "type": "function", | |
| "function": { | |
| "name": "get_current_time", | |
| "description": "Get the current time in a given location.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "location": {"type": "string", "description": "The city name, e.g. San Francisco"} | |
| }, | |
| "required": ["location"] | |
| } | |
| } | |
| }] | |
| client = AsyncHTTPHandler() | |
| # Mock the post method | |
| with patch.object(client, "post", new=AsyncMock()) as mock_post: | |
| try: | |
| await litellm.acompletion( | |
| model="cohere/command", | |
| messages=[{"content": "what llm are you", "role": "user"}], | |
| allowed_openai_params=["tools", "response_format", "reasoning_effort"], | |
| response_format=test_response_format, | |
| reasoning_effort=test_reasoning_effort, | |
| tools=test_tools, | |
| client=client | |
| ) | |
| except Exception: | |
| pass # We only care about the request body validation | |
| # Verify the API call was made | |
| mock_post.assert_called_once() | |
| # Get and parse the request body | |
| request_data = json.loads(mock_post.call_args.kwargs["data"]) | |
| print(f"request_data: {request_data}") | |
| # Validate request contains our specified parameters | |
| assert "allowed_openai_params" not in request_data | |
| assert request_data["response_format"] == test_response_format | |
| assert request_data["reasoning_effort"] == test_reasoning_effort | |
| def test_cohere_embedding_outout_dimensions(): | |
| litellm._turn_on_debug() | |
| response = embedding(model="cohere/embed-v4.0", input="Hello, world!", dimensions=512) | |
| print(f"response: {response}\n") | |
| assert len(response.data[0]["embedding"]) == 512 | |
| # Comprehensive Cohere Embed v4 tests | |
| async def test_cohere_embed_v4_basic_text(sync_mode): | |
| """Test basic text embedding functionality with Cohere Embed v4.""" | |
| try: | |
| data = { | |
| "model": "cohere/embed-v4.0", | |
| "input": ["Hello world!", "This is a test sentence."], | |
| "input_type": "search_document" | |
| } | |
| if sync_mode: | |
| response = embedding(**data) | |
| else: | |
| response = await litellm.aembedding(**data) | |
| # Validate response structure | |
| assert response.model is not None | |
| assert len(response.data) == 2 | |
| assert response.data[0]['object'] == 'embedding' | |
| assert len(response.data[0]['embedding']) > 0 | |
| assert response.usage.prompt_tokens > 0 | |
| assert isinstance(response.usage, litellm.Usage) | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| async def test_cohere_embed_v4_with_dimensions(sync_mode): | |
| """Test Cohere Embed v4 with specific dimension parameter.""" | |
| try: | |
| data = { | |
| "model": "cohere/embed-v4.0", | |
| "input": ["Test with custom dimensions"], | |
| "dimensions": 512, | |
| "input_type": "search_query" | |
| } | |
| if sync_mode: | |
| response = embedding(**data) | |
| else: | |
| response = await litellm.aembedding(**data) | |
| # Validate dimension | |
| assert len(response.data[0]['embedding']) == 512 | |
| assert isinstance(response.usage, litellm.Usage) | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| async def test_cohere_embed_v4_image_embedding(sync_mode): | |
| """Test Cohere Embed v4 image embedding functionality (multimodal).""" | |
| try: | |
| import base64 | |
| # 1x1 pixel red PNG (base64 encoded) | |
| test_image_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS\xde\x00\x00\x00\tpHYs\x00\x00\x0b\x13\x00\x00\x0b\x13\x01\x00\x9a\x9c\x18\x00\x00\x00\x0cIDATx\x9cc\xf8\x00\x00\x00\x01\x00\x01\x00\x00\x00\x00' | |
| test_image_b64 = base64.b64encode(test_image_data).decode('utf-8') | |
| data = { | |
| "model": "cohere/embed-v4.0", | |
| "input": [test_image_b64], | |
| "input_type": "image" | |
| } | |
| if sync_mode: | |
| response = embedding(**data) | |
| else: | |
| response = await litellm.aembedding(**data) | |
| # Validate response structure for image embedding | |
| assert response.model is not None | |
| assert len(response.data) == 1 | |
| assert response.data[0]['object'] == 'embedding' | |
| assert len(response.data[0]['embedding']) > 0 | |
| assert isinstance(response.usage, litellm.Usage) | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| async def test_cohere_embed_v4_input_types(input_type): | |
| """Test Cohere Embed v4 with different input types.""" | |
| try: | |
| response = await litellm.aembedding( | |
| model="cohere/embed-v4.0", | |
| input=[f"Test text for {input_type}"], | |
| input_type=input_type | |
| ) | |
| assert response.model is not None | |
| assert len(response.data) == 1 | |
| assert response.data[0]['object'] == 'embedding' | |
| assert len(response.data[0]['embedding']) > 0 | |
| assert isinstance(response.usage, litellm.Usage) | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| def test_cohere_embed_v4_encoding_format(): | |
| """Test Cohere Embed v4 with different encoding formats.""" | |
| try: | |
| response = embedding( | |
| model="cohere/embed-v4.0", | |
| input=["Test encoding format"], | |
| encoding_format="float" | |
| ) | |
| assert response.model is not None | |
| assert len(response.data) == 1 | |
| assert response.data[0]['object'] == 'embedding' | |
| assert len(response.data[0]['embedding']) > 0 | |
| # Validate that embeddings are floats | |
| assert all(isinstance(x, float) for x in response.data[0]['embedding']) | |
| assert isinstance(response.usage, litellm.Usage) | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| def test_cohere_embed_v4_error_handling(): | |
| """Test error handling for Cohere Embed v4 with invalid inputs.""" | |
| try: | |
| # Test with empty input - should raise an error | |
| try: | |
| response = embedding( | |
| model="cohere/embed-v4.0", | |
| input=[] # Empty input | |
| ) | |
| pytest.fail("Should have failed with empty input") | |
| except Exception: | |
| pass # Expected to fail | |
| # Test with None input - should raise an error | |
| try: | |
| response = embedding( | |
| model="cohere/embed-v4.0", | |
| input=None | |
| ) | |
| pytest.fail("Should have failed with None input") | |
| except Exception: | |
| pass # Expected to fail | |
| except Exception as e: | |
| pytest.fail(f"Error in error handling test: {e}") | |
| async def test_cohere_embed_v4_multiple_texts(sync_mode): | |
| """Test Cohere Embed v4 with multiple text inputs.""" | |
| try: | |
| texts = [ | |
| "The quick brown fox jumps over the lazy dog", | |
| "Machine learning is transforming the world", | |
| "Python is a versatile programming language", | |
| "Natural language processing enables human-computer interaction" | |
| ] | |
| data = { | |
| "model": "cohere/embed-v4.0", | |
| "input": texts, | |
| "input_type": "search_document" | |
| } | |
| if sync_mode: | |
| response = embedding(**data) | |
| else: | |
| response = await litellm.aembedding(**data) | |
| # Validate response structure | |
| assert response.model is not None | |
| assert len(response.data) == len(texts) | |
| for i, data_item in enumerate(response.data): | |
| assert data_item['object'] == 'embedding' | |
| assert data_item['index'] == i | |
| assert len(data_item['embedding']) > 0 | |
| assert all(isinstance(x, float) for x in data_item['embedding']) | |
| assert isinstance(response.usage, litellm.Usage) | |
| assert response.usage.prompt_tokens > 0 | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") | |
| def test_cohere_embed_v4_with_optional_params(): | |
| """Test Cohere Embed v4 with various optional parameters.""" | |
| try: | |
| response = embedding( | |
| model="cohere/embed-v4.0", | |
| input=["Test with optional parameters"], | |
| input_type="search_query", | |
| dimensions=256, | |
| encoding_format="float" | |
| ) | |
| # Validate response | |
| assert response.model is not None | |
| assert len(response.data) == 1 | |
| assert response.data[0]['object'] == 'embedding' | |
| assert len(response.data[0]['embedding']) == 256 # Custom dimensions | |
| assert all(isinstance(x, float) for x in response.data[0]['embedding']) | |
| assert isinstance(response.usage, litellm.Usage) | |
| except Exception as e: | |
| pytest.fail(f"Error occurred: {e}") |