Spaces:
Paused
Paused
| from abc import ABC, abstractmethod | |
| from litellm.caching import LiteLLMCacheType | |
| import os | |
| import sys | |
| import time | |
| import traceback | |
| import uuid | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import os | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system path | |
| import asyncio | |
| import hashlib | |
| import random | |
| import pytest | |
| import litellm | |
| from litellm.caching import Cache | |
| from litellm import completion, embedding | |
| class LLMCachingUnitTests(ABC): | |
| def get_cache_type(self) -> LiteLLMCacheType: | |
| pass | |
| async def test_cache_completion(self, sync_mode): | |
| litellm._turn_on_debug() | |
| random_number = random.randint( | |
| 1, 100000 | |
| ) # add a random number to ensure it's always adding / reading from cache | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": f"write a one sentence poem about: {random_number}", | |
| } | |
| ] | |
| cache_type = self.get_cache_type() | |
| litellm.cache = Cache( | |
| type=cache_type, | |
| ) | |
| if sync_mode: | |
| response1 = completion( | |
| "gpt-3.5-turbo", | |
| messages=messages, | |
| caching=True, | |
| max_tokens=20, | |
| mock_response="This number is so great!", | |
| ) | |
| else: | |
| response1 = await litellm.acompletion( | |
| "gpt-3.5-turbo", | |
| messages=messages, | |
| caching=True, | |
| max_tokens=20, | |
| mock_response="This number is so great!", | |
| ) | |
| # response2 is mocked to a different response from response1, | |
| # but the completion from the cache should be used instead of the mock | |
| # response since the input is the same as response1 | |
| await asyncio.sleep(0.5) | |
| if sync_mode: | |
| response2 = completion( | |
| "gpt-3.5-turbo", | |
| messages=messages, | |
| caching=True, | |
| max_tokens=20, | |
| mock_response="This number is great!", | |
| ) | |
| else: | |
| response2 = await litellm.acompletion( | |
| "gpt-3.5-turbo", | |
| messages=messages, | |
| caching=True, | |
| max_tokens=20, | |
| mock_response="This number is great!", | |
| ) | |
| if ( | |
| response1["choices"][0]["message"]["content"] | |
| != response2["choices"][0]["message"]["content"] | |
| ): # 1 and 2 should be the same | |
| # 1&2 have the exact same input params. This MUST Be a CACHE HIT | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| pytest.fail( | |
| f"Error occurred: response1 - {response1['choices'][0]['message']['content']} != response2 - {response2['choices'][0]['message']['content']}" | |
| ) | |
| # Since the parameters are not the same as response1, response3 should actually | |
| # be the mock response | |
| if sync_mode: | |
| response3 = completion( | |
| "gpt-3.5-turbo", | |
| messages=messages, | |
| caching=True, | |
| temperature=0.5, | |
| mock_response="This number is awful!", | |
| ) | |
| else: | |
| response3 = await litellm.acompletion( | |
| "gpt-3.5-turbo", | |
| messages=messages, | |
| caching=True, | |
| temperature=0.5, | |
| mock_response="This number is awful!", | |
| ) | |
| print("\nresponse 1", response1) | |
| print("\nresponse 2", response2) | |
| print("\nresponse 3", response3) | |
| # print("\nresponse 4", response4) | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| # 1 & 2 should be exactly the same | |
| # 1 & 3 should be different, since input params are diff | |
| if ( | |
| response1["choices"][0]["message"]["content"] | |
| == response3["choices"][0]["message"]["content"] | |
| ): | |
| # if input params like max_tokens, temperature are diff it should NOT be a cache hit | |
| print(f"response1: {response1}") | |
| print(f"response3: {response3}") | |
| pytest.fail( | |
| f"Response 1 == response 3. Same model, diff params shoudl not cache Error" | |
| f" occurred:" | |
| ) | |
| assert response1.id == response2.id | |
| assert response1.created == response2.created | |
| assert ( | |
| response1.choices[0].message.content == response2.choices[0].message.content | |
| ) | |
| async def test_disk_cache_embedding(self, sync_mode): | |
| litellm._turn_on_debug() | |
| random_number = random.randint( | |
| 1, 100000 | |
| ) # add a random number to ensure it's always adding / reading from cache | |
| input = [f"hello {random_number}"] | |
| litellm.cache = Cache( | |
| type="disk", | |
| ) | |
| if sync_mode: | |
| response1 = embedding( | |
| "openai/text-embedding-ada-002", | |
| input=input, | |
| caching=True, | |
| ) | |
| else: | |
| response1 = await litellm.aembedding( | |
| "openai/text-embedding-ada-002", | |
| input=input, | |
| caching=True, | |
| ) | |
| # response2 is mocked to a different response from response1, | |
| # but the completion from the cache should be used instead of the mock | |
| # response since the input is the same as response1 | |
| await asyncio.sleep(0.5) | |
| if sync_mode: | |
| response2 = embedding( | |
| "openai/text-embedding-ada-002", | |
| input=input, | |
| caching=True, | |
| ) | |
| else: | |
| response2 = await litellm.aembedding( | |
| "openai/text-embedding-ada-002", | |
| input=input, | |
| caching=True, | |
| ) | |
| if response2._hidden_params["cache_hit"] is not True: | |
| pytest.fail("Cache hit should be True") | |
| # Since the parameters are not the same as response1, response3 should actually | |
| # be the mock response | |
| if sync_mode: | |
| response3 = embedding( | |
| "openai/text-embedding-ada-002", | |
| input=input, | |
| user="charlie", | |
| caching=True, | |
| ) | |
| else: | |
| response3 = await litellm.aembedding( | |
| "openai/text-embedding-ada-002", | |
| input=input, | |
| caching=True, | |
| user="charlie", | |
| ) | |
| print("\nresponse 1", response1) | |
| print("\nresponse 2", response2) | |
| print("\nresponse 3", response3) | |
| # print("\nresponse 4", response4) | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| # 1 & 2 should be exactly the same | |
| # 1 & 3 should be different, since input params are diff | |
| if response3._hidden_params.get("cache_hit") is True: | |
| pytest.fail("Cache hit should not be True") | |