|
|
""" |
|
|
Iterating on LLM Apps with TruLens |
|
|
https://www.trulens.org/trulens_eval/4_harmless_rag/#set-up-harmless-evaluations |
|
|
""" |
|
|
import os |
|
|
import pathlib |
|
|
import time |
|
|
import random |
|
|
|
|
|
import PIL |
|
|
import litellm |
|
|
import google.generativeai as genai |
|
|
import requests |
|
|
|
|
|
from trulens_eval import Feedback, Tru, TruBasicApp |
|
|
from trulens_eval.feedback import Groundedness |
|
|
from trulens_eval.feedback.provider.litellm import LiteLLM |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
litellm.set_verbose = False |
|
|
|
|
|
|
|
|
model = genai.GenerativeModel('gemini-pro-vision') |
|
|
provider = LiteLLM(model_engine='chat-bison-32k', max_output_tokens=2048, temperature=0.0) |
|
|
grounded = Groundedness(groundedness_provider=provider) |
|
|
|
|
|
|
|
|
f_criminality = Feedback( |
|
|
provider.criminality_with_cot_reasons, |
|
|
name="Criminality", |
|
|
higher_is_better=False, |
|
|
).on_output() |
|
|
|
|
|
f_insensitivity = Feedback( |
|
|
provider.insensitivity_with_cot_reasons, |
|
|
name="Insensitivity", |
|
|
higher_is_better=False, |
|
|
).on_output() |
|
|
|
|
|
f_maliciousness = Feedback( |
|
|
provider.maliciousness_with_cot_reasons, |
|
|
name="Maliciousness", |
|
|
higher_is_better=False, |
|
|
).on_output() |
|
|
|
|
|
|
|
|
f_hate = Feedback( |
|
|
provider.harmfulness_with_cot_reasons, |
|
|
name="Harmfulness", |
|
|
higher_is_better=False |
|
|
).on_output() |
|
|
|
|
|
harmless_feedbacks = [ |
|
|
f_criminality, |
|
|
f_insensitivity, |
|
|
f_maliciousness, |
|
|
f_hate, |
|
|
] |
|
|
|
|
|
|
|
|
def go_to_sleep(base: float = 1.1): |
|
|
time.sleep(base + random.random()) |
|
|
|
|
|
|
|
|
def lmm_standalone(image: PIL.Image, prompt: str = None) -> str: |
|
|
""" |
|
|
Use Gemini Pro Vision LMM to generate a response. |
|
|
|
|
|
:param image: The image to use |
|
|
:param prompt: Optional text prompt |
|
|
:return: The description based on the image |
|
|
""" |
|
|
|
|
|
global model |
|
|
|
|
|
|
|
|
print(f'{image=}') |
|
|
if prompt: |
|
|
response = model.generate_content([prompt, image], stream=False).text |
|
|
else: |
|
|
response = model.generate_content(image, stream=False).text |
|
|
print(f'> {response=}') |
|
|
|
|
|
return response |
|
|
|
|
|
|
|
|
def harmless_image(app_id: str, text_prompt: str = None): |
|
|
tru_lmm_standalone_recorder = TruBasicApp( |
|
|
lmm_standalone, |
|
|
app_id=app_id, |
|
|
feedbacks=harmless_feedbacks |
|
|
) |
|
|
|
|
|
if os.path.exists('eval_img'): |
|
|
|
|
|
with tru_lmm_standalone_recorder as _: |
|
|
for an_img in os.listdir('eval_img'): |
|
|
print('=' * 70) |
|
|
print(an_img) |
|
|
|
|
|
try: |
|
|
img = PIL.Image.open(f'eval_img/{an_img}') |
|
|
|
|
|
|
|
|
if img.mode in ("RGBA", "P"): |
|
|
img = img.convert("RGB") |
|
|
|
|
|
|
|
|
|
|
|
tru_lmm_standalone_recorder.app(img, text_prompt) |
|
|
go_to_sleep() |
|
|
|
|
|
except PIL.UnidentifiedImageError: |
|
|
print(f'Skipping {an_img}...') |
|
|
|
|
|
if os.path.exists('eval_img/urls.txt'): |
|
|
with open('eval_img/urls.txt', 'r') as _: |
|
|
urls = _.readlines() |
|
|
|
|
|
with tru_lmm_standalone_recorder as _: |
|
|
for url in urls: |
|
|
url = url.strip() |
|
|
if len(url) > 0: |
|
|
print(url) |
|
|
|
|
|
try: |
|
|
img = PIL.Image.open(requests.get(url, stream=True).raw) |
|
|
if img.mode in ("RGBA", "P"): |
|
|
img = img.convert("RGB") |
|
|
|
|
|
tru_lmm_standalone_recorder.app(img) |
|
|
go_to_sleep() |
|
|
except PIL.UnidentifiedImageError: |
|
|
print(f'Skipping {url}...') |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
tru = Tru() |
|
|
tru.start_dashboard( |
|
|
|
|
|
_dev=pathlib.Path().cwd().parent.parent.resolve() |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|