Spaces:
Paused
Paused
| import os | |
| import sys | |
| import pytest | |
| from litellm.utils import supports_url_context | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system paths | |
| from base_llm_unit_tests import BaseLLMChatTest | |
| from litellm.llms.vertex_ai.context_caching.transformation import ( | |
| separate_cached_messages, | |
| ) | |
| import litellm | |
| from litellm import completion | |
| class TestGoogleAIStudioGemini(BaseLLMChatTest): | |
| def get_base_completion_call_args(self) -> dict: | |
| return {"model": "gemini/gemini-2.0-flash"} | |
| def get_base_completion_call_args_with_reasoning_model(self) -> dict: | |
| return {"model": "gemini/gemini-2.5-flash-preview-04-17"} | |
| def test_tool_call_no_arguments(self, tool_call_no_arguments): | |
| """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833""" | |
| from litellm.litellm_core_utils.prompt_templates.factory import ( | |
| convert_to_gemini_tool_call_invoke, | |
| ) | |
| result = convert_to_gemini_tool_call_invoke(tool_call_no_arguments) | |
| print(result) | |
| def test_url_context(self): | |
| from litellm.utils import supports_url_context | |
| os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" | |
| litellm.model_cost = litellm.get_model_cost_map(url="") | |
| litellm._turn_on_debug() | |
| base_completion_call_args = self.get_base_completion_call_args() | |
| if not supports_url_context(base_completion_call_args["model"], None): | |
| pytest.skip("Model does not support url context") | |
| response = self.completion_function( | |
| **base_completion_call_args, | |
| messages=[{"role": "user", "content": "Summarize the content of this URL: https://en.wikipedia.org/wiki/Artificial_intelligence"}], | |
| tools=[{"urlContext": {}}], | |
| ) | |
| assert response is not None | |
| assert response.model_extra['vertex_ai_url_context_metadata'] is not None, "URL context metadata should be present" | |
| print(f"response={response}") | |
| def test_gemini_context_caching_separate_messages(): | |
| messages = [ | |
| # System Message | |
| { | |
| "role": "system", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": "Here is the full text of a complex legal agreement" * 400, | |
| "cache_control": {"type": "ephemeral"}, | |
| } | |
| ], | |
| }, | |
| # marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache. | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": "What are the key terms and conditions in this agreement?", | |
| "cache_control": {"type": "ephemeral"}, | |
| } | |
| ], | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": "Certainly! the key terms and conditions are the following: the contract is 1 year long for $10/mo", | |
| }, | |
| # The final turn is marked with cache-control, for continuing in followups. | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": "What are the key terms and conditions in this agreement?", | |
| "cache_control": {"type": "ephemeral"}, | |
| } | |
| ], | |
| }, | |
| ] | |
| cached_messages, non_cached_messages = separate_cached_messages(messages) | |
| print(cached_messages) | |
| print(non_cached_messages) | |
| assert len(cached_messages) > 0, "Cached messages should be present" | |
| assert len(non_cached_messages) > 0, "Non-cached messages should be present" | |
| def test_gemini_image_generation(): | |
| # litellm._turn_on_debug() | |
| response = completion( | |
| model="gemini/gemini-2.0-flash-exp-image-generation", | |
| messages=[{"role": "user", "content": "Generate an image of a cat"}], | |
| modalities=["image", "text"], | |
| ) | |
| assert response.choices[0].message.content is not None | |
| def test_gemini_thinking(): | |
| litellm._turn_on_debug() | |
| from litellm.types.utils import Message, CallTypes | |
| from litellm.utils import return_raw_request | |
| import json | |
| messages = [ | |
| {"role": "user", "content": "Explain the concept of Occam's Razor and provide a simple, everyday example"} | |
| ] | |
| reasoning_content = "I'm thinking about Occam's Razor." | |
| assistant_message = Message(content='Okay, let\'s break down Occam\'s Razor.', reasoning_content=reasoning_content, role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None) | |
| messages.append(assistant_message) | |
| raw_request = return_raw_request( | |
| endpoint=CallTypes.completion, | |
| kwargs={ | |
| "model": "gemini/gemini-2.5-flash-preview-04-17", | |
| "messages": messages, | |
| } | |
| ) | |
| assert reasoning_content in json.dumps(raw_request) | |
| response = completion( | |
| model="gemini/gemini-2.5-flash-preview-04-17", | |
| messages=messages, # make sure call works | |
| ) | |
| print(response.choices[0].message) | |
| assert response.choices[0].message.content is not None | |
| def test_gemini_thinking_budget_0(): | |
| litellm._turn_on_debug() | |
| from litellm.types.utils import Message, CallTypes | |
| from litellm.utils import return_raw_request | |
| import json | |
| raw_request = return_raw_request( | |
| endpoint=CallTypes.completion, | |
| kwargs={ | |
| "model": "gemini/gemini-2.5-flash-preview-04-17", | |
| "messages": [{"role": "user", "content": "Explain the concept of Occam's Razor and provide a simple, everyday example"}], | |
| "thinking": {"type": "enabled", "budget_tokens": 0} | |
| } | |
| ) | |
| print(raw_request) | |
| assert "0" in json.dumps(raw_request["raw_request_body"]) | |
| def test_gemini_finish_reason(): | |
| import os | |
| from litellm import completion | |
| litellm._turn_on_debug() | |
| response = completion(model="gemini/gemini-1.5-pro", messages=[{"role": "user", "content": "give me 3 random words"}], max_tokens=2) | |
| print(response) | |
| assert response.choices[0].finish_reason is not None | |
| assert response.choices[0].finish_reason == "length" | |
| def test_gemini_url_context(): | |
| from litellm import completion | |
| litellm._turn_on_debug() | |
| url = "https://ai.google.dev/gemini-api/docs/models" | |
| prompt = f""" | |
| Summarize this document: | |
| {url} | |
| """ | |
| response = completion( | |
| model="gemini/gemini-2.0-flash", | |
| messages=[{"role": "user", "content": prompt}], | |
| tools=[{"urlContext": {}}], | |
| ) | |
| print(response) | |
| message = response.choices[0].message.content | |
| assert message is not None | |
| url_context_metadata = response.model_extra['vertex_ai_url_context_metadata'] | |
| assert url_context_metadata is not None | |
| urlMetadata = url_context_metadata[0]['urlMetadata'][0] | |
| assert urlMetadata['retrievedUrl'] == url | |
| assert urlMetadata['urlRetrievalStatus'] == 'URL_RETRIEVAL_STATUS_SUCCESS' | |
| def test_gemini_with_grounding(): | |
| from litellm import completion, Usage, stream_chunk_builder | |
| litellm._turn_on_debug() | |
| litellm.set_verbose = True | |
| tools = [{"googleSearch": {}}] | |
| # response = completion(model="gemini/gemini-2.0-flash", messages=[{"role": "user", "content": "What is the capital of France?"}], tools=tools) | |
| # print(response) | |
| # usage: Usage = response.usage | |
| # assert usage.prompt_tokens_details.web_search_requests is not None | |
| # assert usage.prompt_tokens_details.web_search_requests > 0 | |
| ## Check streaming | |
| response = completion(model="gemini/gemini-2.0-flash", messages=[{"role": "user", "content": "What is the capital of France?"}], tools=tools, stream=True, stream_options={"include_usage": True}) | |
| chunks = [] | |
| for chunk in response: | |
| chunks.append(chunk) | |
| print(f"chunks before stream_chunk_builder: {chunks}") | |
| assert len(chunks) > 0 | |
| complete_response = stream_chunk_builder(chunks) | |
| print(complete_response) | |
| assert complete_response is not None | |
| usage: Usage = complete_response.usage | |
| assert usage.prompt_tokens_details.web_search_requests is not None | |
| assert usage.prompt_tokens_details.web_search_requests > 0 | |
| def test_gemini_with_empty_function_call_arguments(): | |
| from litellm import completion | |
| litellm._turn_on_debug() | |
| tools = [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "get_current_weather", | |
| "parameters": "", | |
| }, | |
| } | |
| ] | |
| response = completion(model="gemini/gemini-2.0-flash", messages=[{"role": "user", "content": "What is the capital of France?"}], tools=tools) | |
| print(response) | |
| assert response.choices[0].message.content is not None |