Spaces:
Paused
Paused
| import sys, os, uuid | |
| import time | |
| import traceback | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import os | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system path | |
| import pytest | |
| import litellm | |
| from litellm import embedding, completion | |
| from litellm.caching import Cache | |
| import random | |
| import hashlib | |
| # litellm.set_verbose=True | |
| messages = [{"role": "user", "content": "who is ishaan Github? "}] | |
| # comment | |
| import random | |
| import string | |
| def generate_random_word(length=4): | |
| letters = string.ascii_lowercase | |
| return "".join(random.choice(letters) for _ in range(length)) | |
| messages = [{"role": "user", "content": "who is ishaan 5222"}] | |
| def test_caching_v2(): # test in memory cache | |
| try: | |
| litellm.set_verbose = True | |
| litellm.cache = Cache() | |
| response1 = completion(model="gpt-3.5-turbo", messages=messages, caching=True) | |
| response2 = completion(model="gpt-3.5-turbo", messages=messages, caching=True) | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| litellm.cache = None # disable cache | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| if ( | |
| response2["choices"][0]["message"]["content"] | |
| != response1["choices"][0]["message"]["content"] | |
| ): | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| pytest.fail(f"Error occurred:") | |
| except Exception as e: | |
| print(f"error occurred: {traceback.format_exc()}") | |
| pytest.fail(f"Error occurred: {e}") | |
| # test_caching_v2() | |
| def test_caching_with_ttl(): | |
| try: | |
| litellm.set_verbose = True | |
| litellm.cache = Cache() | |
| response1 = completion( | |
| model="gpt-3.5-turbo", messages=messages, caching=True, ttl=0 | |
| ) | |
| response2 = completion(model="gpt-3.5-turbo", messages=messages, caching=True) | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| litellm.cache = None # disable cache | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| assert ( | |
| response2["choices"][0]["message"]["content"] | |
| != response1["choices"][0]["message"]["content"] | |
| ) | |
| except Exception as e: | |
| print(f"error occurred: {traceback.format_exc()}") | |
| pytest.fail(f"Error occurred: {e}") | |
| def test_caching_with_cache_controls(): | |
| try: | |
| litellm.set_verbose = True | |
| litellm.cache = Cache() | |
| message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}] | |
| ## TTL = 0 | |
| response1 = completion( | |
| model="gpt-3.5-turbo", messages=messages, cache={"ttl": 0} | |
| ) | |
| response2 = completion( | |
| model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 10} | |
| ) | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| assert ( | |
| response2["choices"][0]["message"]["content"] | |
| != response1["choices"][0]["message"]["content"] | |
| ) | |
| message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}] | |
| ## TTL = 5 | |
| response1 = completion( | |
| model="gpt-3.5-turbo", messages=messages, cache={"ttl": 5} | |
| ) | |
| response2 = completion( | |
| model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 5} | |
| ) | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| assert ( | |
| response2["choices"][0]["message"]["content"] | |
| == response1["choices"][0]["message"]["content"] | |
| ) | |
| except Exception as e: | |
| print(f"error occurred: {traceback.format_exc()}") | |
| pytest.fail(f"Error occurred: {e}") | |
| # test_caching_with_cache_controls() | |
| def test_caching_with_models_v2(): | |
| messages = [ | |
| {"role": "user", "content": "who is ishaan CTO of litellm from litellm 2023"} | |
| ] | |
| litellm.cache = Cache() | |
| print("test2 for caching") | |
| response1 = completion(model="gpt-3.5-turbo", messages=messages, caching=True) | |
| response2 = completion(model="gpt-3.5-turbo", messages=messages, caching=True) | |
| response3 = completion(model="command-nightly", messages=messages, caching=True) | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| print(f"response3: {response3}") | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| if ( | |
| response3["choices"][0]["message"]["content"] | |
| == response2["choices"][0]["message"]["content"] | |
| ): | |
| # if models are different, it should not return cached response | |
| print(f"response2: {response2}") | |
| print(f"response3: {response3}") | |
| pytest.fail(f"Error occurred:") | |
| if ( | |
| response1["choices"][0]["message"]["content"] | |
| != response2["choices"][0]["message"]["content"] | |
| ): | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| pytest.fail(f"Error occurred:") | |
| # test_caching_with_models_v2() | |
| embedding_large_text = ( | |
| """ | |
| small text | |
| """ | |
| * 5 | |
| ) | |
| # # test_caching_with_models() | |
| def test_embedding_caching(): | |
| import time | |
| # litellm.set_verbose = True | |
| litellm.cache = Cache() | |
| text_to_embed = [embedding_large_text] | |
| start_time = time.time() | |
| embedding1 = embedding( | |
| model="text-embedding-ada-002", input=text_to_embed, caching=True | |
| ) | |
| end_time = time.time() | |
| print(f"Embedding 1 response time: {end_time - start_time} seconds") | |
| time.sleep(1) | |
| start_time = time.time() | |
| embedding2 = embedding( | |
| model="text-embedding-ada-002", input=text_to_embed, caching=True | |
| ) | |
| end_time = time.time() | |
| # print(f"embedding2: {embedding2}") | |
| print(f"Embedding 2 response time: {end_time - start_time} seconds") | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| assert end_time - start_time <= 0.1 # ensure 2nd response comes in in under 0.1 s | |
| if embedding2["data"][0]["embedding"] != embedding1["data"][0]["embedding"]: | |
| print(f"embedding1: {embedding1}") | |
| print(f"embedding2: {embedding2}") | |
| pytest.fail("Error occurred: Embedding caching failed") | |
| # test_embedding_caching() | |
| def test_embedding_caching_azure(): | |
| print("Testing azure embedding caching") | |
| import time | |
| litellm.cache = Cache() | |
| text_to_embed = [embedding_large_text] | |
| api_key = os.environ["AZURE_API_KEY"] | |
| api_base = os.environ["AZURE_API_BASE"] | |
| api_version = os.environ["AZURE_API_VERSION"] | |
| os.environ["AZURE_API_VERSION"] = "" | |
| os.environ["AZURE_API_BASE"] = "" | |
| os.environ["AZURE_API_KEY"] = "" | |
| start_time = time.time() | |
| print("AZURE CONFIGS") | |
| print(api_version) | |
| print(api_key) | |
| print(api_base) | |
| embedding1 = embedding( | |
| model="azure/azure-embedding-model", | |
| input=["good morning from litellm", "this is another item"], | |
| api_key=api_key, | |
| api_base=api_base, | |
| api_version=api_version, | |
| caching=True, | |
| ) | |
| end_time = time.time() | |
| print(f"Embedding 1 response time: {end_time - start_time} seconds") | |
| time.sleep(1) | |
| start_time = time.time() | |
| embedding2 = embedding( | |
| model="azure/azure-embedding-model", | |
| input=["good morning from litellm", "this is another item"], | |
| api_key=api_key, | |
| api_base=api_base, | |
| api_version=api_version, | |
| caching=True, | |
| ) | |
| end_time = time.time() | |
| print(f"Embedding 2 response time: {end_time - start_time} seconds") | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| assert end_time - start_time <= 0.1 # ensure 2nd response comes in in under 0.1 s | |
| if embedding2["data"][0]["embedding"] != embedding1["data"][0]["embedding"]: | |
| print(f"embedding1: {embedding1}") | |
| print(f"embedding2: {embedding2}") | |
| pytest.fail("Error occurred: Embedding caching failed") | |
| os.environ["AZURE_API_VERSION"] = api_version | |
| os.environ["AZURE_API_BASE"] = api_base | |
| os.environ["AZURE_API_KEY"] = api_key | |
| # test_embedding_caching_azure() | |
| def test_redis_cache_completion(): | |
| litellm.set_verbose = False | |
| random_number = random.randint( | |
| 1, 100000 | |
| ) # add a random number to ensure it's always adding / reading from cache | |
| messages = [ | |
| {"role": "user", "content": f"write a one sentence poem about: {random_number}"} | |
| ] | |
| litellm.cache = Cache( | |
| type="redis", | |
| host=os.environ["REDIS_HOST"], | |
| port=os.environ["REDIS_PORT"], | |
| password=os.environ["REDIS_PASSWORD"], | |
| ) | |
| print("test2 for Redis Caching - non streaming") | |
| response1 = completion( | |
| model="gpt-3.5-turbo", messages=messages, caching=True, max_tokens=20 | |
| ) | |
| response2 = completion( | |
| model="gpt-3.5-turbo", messages=messages, caching=True, max_tokens=20 | |
| ) | |
| response3 = completion( | |
| model="gpt-3.5-turbo", messages=messages, caching=True, temperature=0.5 | |
| ) | |
| response4 = completion(model="command-nightly", messages=messages, caching=True) | |
| print("\nresponse 1", response1) | |
| print("\nresponse 2", response2) | |
| print("\nresponse 3", response3) | |
| print("\nresponse 4", response4) | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| """ | |
| 1 & 2 should be exactly the same | |
| 1 & 3 should be different, since input params are diff | |
| 1 & 4 should be diff, since models are diff | |
| """ | |
| if ( | |
| response1["choices"][0]["message"]["content"] | |
| != response2["choices"][0]["message"]["content"] | |
| ): # 1 and 2 should be the same | |
| # 1&2 have the exact same input params. This MUST Be a CACHE HIT | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| pytest.fail(f"Error occurred:") | |
| if ( | |
| response1["choices"][0]["message"]["content"] | |
| == response3["choices"][0]["message"]["content"] | |
| ): | |
| # if input params like seed, max_tokens are diff it should NOT be a cache hit | |
| print(f"response1: {response1}") | |
| print(f"response3: {response3}") | |
| pytest.fail( | |
| f"Response 1 == response 3. Same model, diff params shoudl not cache Error occurred:" | |
| ) | |
| if ( | |
| response1["choices"][0]["message"]["content"] | |
| == response4["choices"][0]["message"]["content"] | |
| ): | |
| # if models are different, it should not return cached response | |
| print(f"response1: {response1}") | |
| print(f"response4: {response4}") | |
| pytest.fail(f"Error occurred:") | |
| assert response1.id == response2.id | |
| assert response1.created == response2.created | |
| assert response1.choices[0].message.content == response2.choices[0].message.content | |
| # test_redis_cache_completion() | |
| def test_redis_cache_completion_stream(): | |
| try: | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| litellm.callbacks = [] | |
| litellm.set_verbose = True | |
| random_number = random.randint( | |
| 1, 100000 | |
| ) # add a random number to ensure it's always adding / reading from cache | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": f"write a one sentence poem about: {random_number}", | |
| } | |
| ] | |
| litellm.cache = Cache( | |
| type="redis", | |
| host=os.environ["REDIS_HOST"], | |
| port=os.environ["REDIS_PORT"], | |
| password=os.environ["REDIS_PASSWORD"], | |
| ) | |
| print("test for caching, streaming + completion") | |
| response1 = completion( | |
| model="gpt-3.5-turbo", | |
| messages=messages, | |
| max_tokens=40, | |
| temperature=0.2, | |
| stream=True, | |
| ) | |
| response_1_content = "" | |
| for chunk in response1: | |
| print(chunk) | |
| response_1_content += chunk.choices[0].delta.content or "" | |
| print(response_1_content) | |
| time.sleep(0.5) | |
| response2 = completion( | |
| model="gpt-3.5-turbo", | |
| messages=messages, | |
| max_tokens=40, | |
| temperature=0.2, | |
| stream=True, | |
| ) | |
| response_2_content = "" | |
| for chunk in response2: | |
| print(chunk) | |
| response_2_content += chunk.choices[0].delta.content or "" | |
| print("\nresponse 1", response_1_content) | |
| print("\nresponse 2", response_2_content) | |
| assert ( | |
| response_1_content == response_2_content | |
| ), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}" | |
| litellm.success_callback = [] | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| except Exception as e: | |
| print(e) | |
| litellm.success_callback = [] | |
| raise e | |
| """ | |
| 1 & 2 should be exactly the same | |
| """ | |
| test_redis_cache_completion_stream() | |
| def test_redis_cache_acompletion_stream(): | |
| import asyncio | |
| try: | |
| litellm.set_verbose = True | |
| random_word = generate_random_word() | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": f"write a one sentence poem about: {random_word}", | |
| } | |
| ] | |
| litellm.cache = Cache( | |
| type="redis", | |
| host=os.environ["REDIS_HOST"], | |
| port=os.environ["REDIS_PORT"], | |
| password=os.environ["REDIS_PASSWORD"], | |
| ) | |
| print("test for caching, streaming + completion") | |
| response_1_content = "" | |
| response_2_content = "" | |
| async def call1(): | |
| nonlocal response_1_content | |
| response1 = await litellm.acompletion( | |
| model="gpt-3.5-turbo", | |
| messages=messages, | |
| max_tokens=40, | |
| temperature=1, | |
| stream=True, | |
| ) | |
| async for chunk in response1: | |
| print(chunk) | |
| response_1_content += chunk.choices[0].delta.content or "" | |
| print(response_1_content) | |
| asyncio.run(call1()) | |
| time.sleep(0.5) | |
| print("\n\n Response 1 content: ", response_1_content, "\n\n") | |
| async def call2(): | |
| nonlocal response_2_content | |
| response2 = await litellm.acompletion( | |
| model="gpt-3.5-turbo", | |
| messages=messages, | |
| max_tokens=40, | |
| temperature=1, | |
| stream=True, | |
| ) | |
| async for chunk in response2: | |
| print(chunk) | |
| response_2_content += chunk.choices[0].delta.content or "" | |
| print(response_2_content) | |
| asyncio.run(call2()) | |
| print("\nresponse 1", response_1_content) | |
| print("\nresponse 2", response_2_content) | |
| assert ( | |
| response_1_content == response_2_content | |
| ), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}" | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| except Exception as e: | |
| print(e) | |
| raise e | |
| # test_redis_cache_acompletion_stream() | |
| def test_redis_cache_acompletion_stream_bedrock(): | |
| import asyncio | |
| try: | |
| litellm.set_verbose = True | |
| random_word = generate_random_word() | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": f"write a one sentence poem about: {random_word}", | |
| } | |
| ] | |
| litellm.cache = Cache( | |
| type="redis", | |
| host=os.environ["REDIS_HOST"], | |
| port=os.environ["REDIS_PORT"], | |
| password=os.environ["REDIS_PASSWORD"], | |
| ) | |
| print("test for caching, streaming + completion") | |
| response_1_content = "" | |
| response_2_content = "" | |
| async def call1(): | |
| nonlocal response_1_content | |
| response1 = await litellm.acompletion( | |
| model="bedrock/anthropic.claude-v1", | |
| messages=messages, | |
| max_tokens=40, | |
| temperature=1, | |
| stream=True, | |
| ) | |
| async for chunk in response1: | |
| print(chunk) | |
| response_1_content += chunk.choices[0].delta.content or "" | |
| print(response_1_content) | |
| asyncio.run(call1()) | |
| time.sleep(0.5) | |
| print("\n\n Response 1 content: ", response_1_content, "\n\n") | |
| async def call2(): | |
| nonlocal response_2_content | |
| response2 = await litellm.acompletion( | |
| model="bedrock/anthropic.claude-v1", | |
| messages=messages, | |
| max_tokens=40, | |
| temperature=1, | |
| stream=True, | |
| ) | |
| async for chunk in response2: | |
| print(chunk) | |
| response_2_content += chunk.choices[0].delta.content or "" | |
| print(response_2_content) | |
| asyncio.run(call2()) | |
| print("\nresponse 1", response_1_content) | |
| print("\nresponse 2", response_2_content) | |
| assert ( | |
| response_1_content == response_2_content | |
| ), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}" | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| except Exception as e: | |
| print(e) | |
| raise e | |
| def test_s3_cache_acompletion_stream_azure(): | |
| import asyncio | |
| try: | |
| litellm.set_verbose = True | |
| random_word = generate_random_word() | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": f"write a one sentence poem about: {random_word}", | |
| } | |
| ] | |
| litellm.cache = Cache( | |
| type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2" | |
| ) | |
| print("s3 Cache: test for caching, streaming + completion") | |
| response_1_content = "" | |
| response_2_content = "" | |
| response_1_created = "" | |
| response_2_created = "" | |
| async def call1(): | |
| nonlocal response_1_content, response_1_created | |
| response1 = await litellm.acompletion( | |
| model="azure/chatgpt-v-2", | |
| messages=messages, | |
| max_tokens=40, | |
| temperature=1, | |
| stream=True, | |
| ) | |
| async for chunk in response1: | |
| print(chunk) | |
| response_1_created = chunk.created | |
| response_1_content += chunk.choices[0].delta.content or "" | |
| print(response_1_content) | |
| asyncio.run(call1()) | |
| time.sleep(0.5) | |
| print("\n\n Response 1 content: ", response_1_content, "\n\n") | |
| async def call2(): | |
| nonlocal response_2_content, response_2_created | |
| response2 = await litellm.acompletion( | |
| model="azure/chatgpt-v-2", | |
| messages=messages, | |
| max_tokens=40, | |
| temperature=1, | |
| stream=True, | |
| ) | |
| async for chunk in response2: | |
| print(chunk) | |
| response_2_content += chunk.choices[0].delta.content or "" | |
| response_2_created = chunk.created | |
| print(response_2_content) | |
| asyncio.run(call2()) | |
| print("\nresponse 1", response_1_content) | |
| print("\nresponse 2", response_2_content) | |
| assert ( | |
| response_1_content == response_2_content | |
| ), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}" | |
| # prioritizing getting a new deploy out - will look at this in the next deploy | |
| # print("response 1 created", response_1_created) | |
| # print("response 2 created", response_2_created) | |
| # assert response_1_created == response_2_created | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| except Exception as e: | |
| print(e) | |
| raise e | |
| # test_s3_cache_acompletion_stream_azure() | |
| # test_redis_cache_acompletion_stream_bedrock() | |
| # redis cache with custom keys | |
| def custom_get_cache_key(*args, **kwargs): | |
| # return key to use for your cache: | |
| key = ( | |
| kwargs.get("model", "") | |
| + str(kwargs.get("messages", "")) | |
| + str(kwargs.get("temperature", "")) | |
| + str(kwargs.get("logit_bias", "")) | |
| ) | |
| return key | |
| def test_custom_redis_cache_with_key(): | |
| messages = [{"role": "user", "content": "write a one line story"}] | |
| litellm.cache = Cache( | |
| type="redis", | |
| host=os.environ["REDIS_HOST"], | |
| port=os.environ["REDIS_PORT"], | |
| password=os.environ["REDIS_PASSWORD"], | |
| ) | |
| litellm.cache.get_cache_key = custom_get_cache_key | |
| local_cache = {} | |
| def set_cache(key, value): | |
| local_cache[key] = value | |
| def get_cache(key): | |
| if key in local_cache: | |
| return local_cache[key] | |
| litellm.cache.cache.set_cache = set_cache | |
| litellm.cache.cache.get_cache = get_cache | |
| # patch this redis cache get and set call | |
| response1 = completion( | |
| model="gpt-3.5-turbo", | |
| messages=messages, | |
| temperature=1, | |
| caching=True, | |
| num_retries=3, | |
| ) | |
| response2 = completion( | |
| model="gpt-3.5-turbo", | |
| messages=messages, | |
| temperature=1, | |
| caching=True, | |
| num_retries=3, | |
| ) | |
| response3 = completion( | |
| model="gpt-3.5-turbo", | |
| messages=messages, | |
| temperature=1, | |
| caching=False, | |
| num_retries=3, | |
| ) | |
| print(f"response1: {response1}") | |
| print(f"response2: {response2}") | |
| print(f"response3: {response3}") | |
| if ( | |
| response3["choices"][0]["message"]["content"] | |
| == response2["choices"][0]["message"]["content"] | |
| ): | |
| pytest.fail(f"Error occurred:") | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| # test_custom_redis_cache_with_key() | |
| def test_cache_override(): | |
| # test if we can override the cache, when `caching=False` but litellm.cache = Cache() is set | |
| # in this case it should not return cached responses | |
| litellm.cache = Cache() | |
| print("Testing cache override") | |
| litellm.set_verbose = True | |
| # test embedding | |
| response1 = embedding( | |
| model="text-embedding-ada-002", input=["hello who are you"], caching=False | |
| ) | |
| start_time = time.time() | |
| response2 = embedding( | |
| model="text-embedding-ada-002", input=["hello who are you"], caching=False | |
| ) | |
| end_time = time.time() | |
| print(f"Embedding 2 response time: {end_time - start_time} seconds") | |
| assert ( | |
| end_time - start_time > 0.1 | |
| ) # ensure 2nd response comes in over 0.1s. This should not be cached. | |
| # test_cache_override() | |
| def test_custom_redis_cache_params(): | |
| # test if we can init redis with **kwargs | |
| try: | |
| litellm.cache = Cache( | |
| type="redis", | |
| host=os.environ["REDIS_HOST"], | |
| port=os.environ["REDIS_PORT"], | |
| password=os.environ["REDIS_PASSWORD"], | |
| db=0, | |
| ssl=True, | |
| ssl_certfile="./redis_user.crt", | |
| ssl_keyfile="./redis_user_private.key", | |
| ssl_ca_certs="./redis_ca.pem", | |
| ) | |
| print(litellm.cache.cache.redis_client) | |
| litellm.cache = None | |
| litellm.success_callback = [] | |
| litellm._async_success_callback = [] | |
| except Exception as e: | |
| pytest.fail(f"Error occurred:", e) | |
| def test_get_cache_key(): | |
| from litellm.caching import Cache | |
| try: | |
| print("Testing get_cache_key") | |
| cache_instance = Cache() | |
| cache_key = cache_instance.get_cache_key( | |
| **{ | |
| "model": "gpt-3.5-turbo", | |
| "messages": [ | |
| {"role": "user", "content": "write a one sentence poem about: 7510"} | |
| ], | |
| "max_tokens": 40, | |
| "temperature": 0.2, | |
| "stream": True, | |
| "litellm_call_id": "ffe75e7e-8a07-431f-9a74-71a5b9f35f0b", | |
| "litellm_logging_obj": {}, | |
| } | |
| ) | |
| cache_key_2 = cache_instance.get_cache_key( | |
| **{ | |
| "model": "gpt-3.5-turbo", | |
| "messages": [ | |
| {"role": "user", "content": "write a one sentence poem about: 7510"} | |
| ], | |
| "max_tokens": 40, | |
| "temperature": 0.2, | |
| "stream": True, | |
| "litellm_call_id": "ffe75e7e-8a07-431f-9a74-71a5b9f35f0b", | |
| "litellm_logging_obj": {}, | |
| } | |
| ) | |
| cache_key_str = "model: gpt-3.5-turbomessages: [{'role': 'user', 'content': 'write a one sentence poem about: 7510'}]temperature: 0.2max_tokens: 40" | |
| hash_object = hashlib.sha256(cache_key_str.encode()) | |
| # Hexadecimal representation of the hash | |
| hash_hex = hash_object.hexdigest() | |
| assert cache_key == hash_hex | |
| assert ( | |
| cache_key_2 == hash_hex | |
| ), f"{cache_key} != {cache_key_2}. The same kwargs should have the same cache key across runs" | |
| embedding_cache_key = cache_instance.get_cache_key( | |
| **{ | |
| "model": "azure/azure-embedding-model", | |
| "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", | |
| "api_key": "", | |
| "api_version": "2023-07-01-preview", | |
| "timeout": None, | |
| "max_retries": 0, | |
| "input": ["hi who is ishaan"], | |
| "caching": True, | |
| "client": "<openai.lib.azure.AsyncAzureOpenAI object at 0x12b6a1060>", | |
| } | |
| ) | |
| print(embedding_cache_key) | |
| embedding_cache_key_str = ( | |
| "model: azure/azure-embedding-modelinput: ['hi who is ishaan']" | |
| ) | |
| hash_object = hashlib.sha256(embedding_cache_key_str.encode()) | |
| # Hexadecimal representation of the hash | |
| hash_hex = hash_object.hexdigest() | |
| assert ( | |
| embedding_cache_key == hash_hex | |
| ), f"{embedding_cache_key} != 'model: azure/azure-embedding-modelinput: ['hi who is ishaan']'. The same kwargs should have the same cache key across runs" | |
| # Proxy - embedding cache, test if embedding key, gets model_group and not model | |
| embedding_cache_key_2 = cache_instance.get_cache_key( | |
| **{ | |
| "model": "azure/azure-embedding-model", | |
| "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", | |
| "api_key": "", | |
| "api_version": "2023-07-01-preview", | |
| "timeout": None, | |
| "max_retries": 0, | |
| "input": ["hi who is ishaan"], | |
| "caching": True, | |
| "client": "<openai.lib.azure.AsyncAzureOpenAI object at 0x12b6a1060>", | |
| "proxy_server_request": { | |
| "url": "http://0.0.0.0:8000/embeddings", | |
| "method": "POST", | |
| "headers": { | |
| "host": "0.0.0.0:8000", | |
| "user-agent": "curl/7.88.1", | |
| "accept": "*/*", | |
| "content-type": "application/json", | |
| "content-length": "80", | |
| }, | |
| "body": { | |
| "model": "azure-embedding-model", | |
| "input": ["hi who is ishaan"], | |
| }, | |
| }, | |
| "user": None, | |
| "metadata": { | |
| "user_api_key": None, | |
| "headers": { | |
| "host": "0.0.0.0:8000", | |
| "user-agent": "curl/7.88.1", | |
| "accept": "*/*", | |
| "content-type": "application/json", | |
| "content-length": "80", | |
| }, | |
| "model_group": "EMBEDDING_MODEL_GROUP", | |
| "deployment": "azure/azure-embedding-model-ModelID-azure/azure-embedding-modelhttps://openai-gpt-4-test-v-1.openai.azure.com/2023-07-01-preview", | |
| }, | |
| "model_info": { | |
| "mode": "embedding", | |
| "base_model": "text-embedding-ada-002", | |
| "id": "20b2b515-f151-4dd5-a74f-2231e2f54e29", | |
| }, | |
| "litellm_call_id": "2642e009-b3cd-443d-b5dd-bb7d56123b0e", | |
| "litellm_logging_obj": "<litellm.utils.Logging object at 0x12f1bddb0>", | |
| } | |
| ) | |
| print(embedding_cache_key_2) | |
| embedding_cache_key_str_2 = ( | |
| "model: EMBEDDING_MODEL_GROUPinput: ['hi who is ishaan']" | |
| ) | |
| hash_object = hashlib.sha256(embedding_cache_key_str_2.encode()) | |
| # Hexadecimal representation of the hash | |
| hash_hex = hash_object.hexdigest() | |
| assert embedding_cache_key_2 == hash_hex | |
| print("passed!") | |
| except Exception as e: | |
| traceback.print_exc() | |
| pytest.fail(f"Error occurred:", e) | |
| # test_get_cache_key() | |
| def test_cache_context_managers(): | |
| litellm.set_verbose = True | |
| litellm.cache = Cache(type="redis") | |
| # cache is on, disable it | |
| litellm.disable_cache() | |
| assert litellm.cache == None | |
| assert "cache" not in litellm.success_callback | |
| assert "cache" not in litellm._async_success_callback | |
| # disable a cache that is off | |
| litellm.disable_cache() | |
| assert litellm.cache == None | |
| assert "cache" not in litellm.success_callback | |
| assert "cache" not in litellm._async_success_callback | |
| litellm.enable_cache( | |
| type="redis", | |
| host=os.environ["REDIS_HOST"], | |
| port=os.environ["REDIS_PORT"], | |
| ) | |
| assert litellm.cache != None | |
| assert litellm.cache.type == "redis" | |
| print("VARS of litellm.cache", vars(litellm.cache)) | |
| # test_cache_context_managers() | |
| # test_custom_redis_cache_params() | |
| # def test_redis_cache_with_ttl(): | |
| # cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD']) | |
| # sample_model_response_object_str = """{ | |
| # "choices": [ | |
| # { | |
| # "finish_reason": "stop", | |
| # "index": 0, | |
| # "message": { | |
| # "role": "assistant", | |
| # "content": "I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic." | |
| # } | |
| # } | |
| # ], | |
| # "created": 1691429984.3852863, | |
| # "model": "claude-instant-1", | |
| # "usage": { | |
| # "prompt_tokens": 18, | |
| # "completion_tokens": 23, | |
| # "total_tokens": 41 | |
| # } | |
| # }""" | |
| # sample_model_response_object = { | |
| # "choices": [ | |
| # { | |
| # "finish_reason": "stop", | |
| # "index": 0, | |
| # "message": { | |
| # "role": "assistant", | |
| # "content": "I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic." | |
| # } | |
| # } | |
| # ], | |
| # "created": 1691429984.3852863, | |
| # "model": "claude-instant-1", | |
| # "usage": { | |
| # "prompt_tokens": 18, | |
| # "completion_tokens": 23, | |
| # "total_tokens": 41 | |
| # } | |
| # } | |
| # cache.add_cache(cache_key="test_key", result=sample_model_response_object_str, ttl=1) | |
| # cached_value = cache.get_cache(cache_key="test_key") | |
| # print(f"cached-value: {cached_value}") | |
| # assert cached_value['choices'][0]['message']['content'] == sample_model_response_object['choices'][0]['message']['content'] | |
| # time.sleep(2) | |
| # assert cache.get_cache(cache_key="test_key") is None | |
| # # test_redis_cache_with_ttl() | |
| # def test_in_memory_cache_with_ttl(): | |
| # cache = Cache(type="local") | |
| # sample_model_response_object_str = """{ | |
| # "choices": [ | |
| # { | |
| # "finish_reason": "stop", | |
| # "index": 0, | |
| # "message": { | |
| # "role": "assistant", | |
| # "content": "I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic." | |
| # } | |
| # } | |
| # ], | |
| # "created": 1691429984.3852863, | |
| # "model": "claude-instant-1", | |
| # "usage": { | |
| # "prompt_tokens": 18, | |
| # "completion_tokens": 23, | |
| # "total_tokens": 41 | |
| # } | |
| # }""" | |
| # sample_model_response_object = { | |
| # "choices": [ | |
| # { | |
| # "finish_reason": "stop", | |
| # "index": 0, | |
| # "message": { | |
| # "role": "assistant", | |
| # "content": "I'm doing well, thank you for asking. I am Claude, an AI assistant created by Anthropic." | |
| # } | |
| # } | |
| # ], | |
| # "created": 1691429984.3852863, | |
| # "model": "claude-instant-1", | |
| # "usage": { | |
| # "prompt_tokens": 18, | |
| # "completion_tokens": 23, | |
| # "total_tokens": 41 | |
| # } | |
| # } | |
| # cache.add_cache(cache_key="test_key", result=sample_model_response_object_str, ttl=1) | |
| # cached_value = cache.get_cache(cache_key="test_key") | |
| # assert cached_value['choices'][0]['message']['content'] == sample_model_response_object['choices'][0]['message']['content'] | |
| # time.sleep(2) | |
| # assert cache.get_cache(cache_key="test_key") is None | |
| # # test_in_memory_cache_with_ttl() | |