Spaces:

kevinhug
/

ai

Sleeping

App Files Files Community

kevinhug commited on Apr 14

Commit

8e73b66

1 Parent(s): f4f1a71

judge

Browse files

Files changed (3) hide show

app.py +15 -10
classify.py +37 -7
judge.py +89 -0

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from tool import rival_product
 from graphrag import reasoning
 from knowledge import graph
 from pii import derisk
-from classify import bucket
 # Define the Google Analytics script
 head = """
@@ -24,7 +24,7 @@ with gr.Blocks(head=head) as demo:
     gr.Markdown("""
 If you're experiencing declining market share, inefficiencies in your operations, here's how I can help:
 ==============
-Marketing/Client Experience
 ------------
 - GraphRAG: Models customer-product relationship networks for next-best-action predictions
 - DSPy: Optimizes cross-sell/upsell prompt variations through A/B testing
@@ -179,8 +179,10 @@ Uses customer data and behavior to craft messages that resonate with specific se
   with gr.Tab("Knowledge Graph"):
     gr.Markdown("""
-    Objective: Explain concept in knowledge graph structured output
-    ================================================
     """)
     in_verbatim = gr.Textbox(label="Question")
     out_product = gr.JSON(label="Knowledge Graph")
@@ -251,20 +253,23 @@ Allows downstream tasks (like sentiment analysis or topic modeling) to focus on
     """)
-  with gr.Tab("multi class classification"):
     gr.Markdown("""
     Objective: Classify customer feedback into product bucket
     ================================================
     """)
     in_verbatim = gr.Textbox(label="Customer Feedback separate by ;")
-    out_product = gr.Textbox(label="Classification")
     gr.Examples(
       [
         [
         """
-"The online portal makes managing my mortgage payments so convenient."
-;"RBC offer great mortgage for my home with competitive rate thank you";
 "Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
 "The mobile check deposit feature saves me so much time. Banking made easy!";
 "Affordable premiums with great coverage. Switched from my old provider and saved!"
@@ -273,8 +278,8 @@ Allows downstream tasks (like sentiment analysis or topic modeling) to focus on
       ],
       [in_verbatim]
     )
-    btn_recommend = gr.Button("Classify")
-    btn_recommend.click(fn=bucket, inputs=in_verbatim, outputs=out_product)
     gr.Markdown("""
 Benefits of Multi Class Classification
 ==================

 from graphrag import reasoning
 from knowledge import graph
 from pii import derisk
+from classify import judge
 # Define the Google Analytics script
 head = """
     gr.Markdown("""
 If you're experiencing declining market share, inefficiencies in your operations, here's how I can help:
 ==============
+Marketing & Client Experience
 ------------
 - GraphRAG: Models customer-product relationship networks for next-best-action predictions
 - DSPy: Optimizes cross-sell/upsell prompt variations through A/B testing
   with gr.Tab("Knowledge Graph"):
     gr.Markdown("""
+Objective: Explain concept in knowledge graph structured output
+=====================================
+- We create query plan by breaking down into subquery
+- Using those subquery to create knowledge graph
     """)
     in_verbatim = gr.Textbox(label="Question")
     out_product = gr.JSON(label="Knowledge Graph")
     """)
+  with gr.Tab("classification"):
     gr.Markdown("""
     Objective: Classify customer feedback into product bucket
     ================================================
+    - multi class classification, could have multiple label for 1 feedback
+    - fix classification in this use case: online banking, card, auto finance, mortgage, insurance
+    - LLM Judge to evaluate relevancy
     """)
     in_verbatim = gr.Textbox(label="Customer Feedback separate by ;")
+    out_product = gr.Textbox(label="Classification & Evaluation")
     gr.Examples(
       [
         [
         """
+"The online portal makes managing my mortgage payments so convenient.";
+"RBC offer great mortgage for my home with competitive rate thank you";
 "Low interest rate compared to other cards I’ve used. Highly recommend for responsible spenders.";
 "The mobile check deposit feature saves me so much time. Banking made easy!";
 "Affordable premiums with great coverage. Switched from my old provider and saved!"
       ],
       [in_verbatim]
     )
+    btn_recommend = gr.Button("Classify & Evaluation")
+    btn_recommend.click(fn=judge, inputs=in_verbatim, outputs=out_product)
     gr.Markdown("""
 Benefits of Multi Class Classification
 ==================

classify.py CHANGED Viewed

@@ -22,15 +22,13 @@ client = instructor.from_openai(
     ),
     mode=instructor.Mode.JSON,
 )
-    chain_of_thought:List[str]= Field(default_factory=list, description="the chain of thought led to the prediction", examples=["Let's think step by step. the customer explicitly mention donation, and there is a tag name with donation, tag the text with donation"])
 """
-llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "deepseek-r1"
 class Tag(BaseModel):
     name: str
     id: int= Field(..., description="id for the specific tag")
     confidence: float = Field(
@@ -73,10 +71,27 @@ class TagRequest(BaseModel):
     texts: List[str]
     tags: List[TagWithInstructions]
 class TagResponse(BaseModel):
     texts: List[str]
-    predictions: List[Optional[Iterable[Tag]]]=Field(...,default_factory=list)
 sem = asyncio.Semaphore(2)
@@ -158,6 +173,19 @@ texts = """
 "Affordable premiums with great coverage. Switched from my old provider and saved!"
 """
 def bucket(texts):
   texts=map(lambda t: t.strip(), texts.split(";"))
   request = TagRequest(texts=texts, tags=tags)
@@ -166,4 +194,6 @@ def bucket(texts):
   return response.model_dump_json(indent=2)
 if __name__=="__main__":
-  print(bucket(texts))

     ),
     mode=instructor.Mode.JSON,
 )
 """
+llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "qwen2.5" #"gemma3:12b" #"llama3.2" #"deepseek-r1"
 class Tag(BaseModel):
+    chain_of_thought:List[str]= Field(default_factory=list, description="the chain of thought led to the prediction", examples=["Let's think step by step. the customer explicitly mention donation, and there is a tag name with donation, tag the text with donation"])
     name: str
     id: int= Field(..., description="id for the specific tag")
     confidence: float = Field(
     texts: List[str]
     tags: List[TagWithInstructions]
+from judge import judge_relevance, Judgment
 class TagResponse(BaseModel):
     texts: List[str]
+    predictions: List[Optional[List[Tag]]]=Field(...,default_factory=list)
+    judgment: List[Optional[List[Judgment]]]=Field(...,default_factory=list)
+    async def judge(self):
+      for i in range(len(self.texts)):
+        p=self.predictions[i]
+        if p:
+          self.judgment.append(
+            await asyncio.gather(*[
+              judge_relevance(
+                " ".join(t.chain_of_thought),
+                texts[i],
+                t.name
+              ) for t in p
+            ])
+          )
+        else:
+          self.judgment.append(None)
 sem = asyncio.Semaphore(2)
 "Affordable premiums with great coverage. Switched from my old provider and saved!"
 """
+def judge_response(response):
+  response.judge()
+def judge(texts):
+  texts=map(lambda t: t.strip(), texts.split(";"))
+  request = TagRequest(texts=texts, tags=tags)
+  response = asyncio.run(tag_request(request))
+  #print(response.model_dump_json(indent=2))
+  asyncio.run(response.judge())
+  #[print(r.model_dump_json(indent=2)) for r in response]
+  return response.model_dump_json(indent=2)
 def bucket(texts):
   texts=map(lambda t: t.strip(), texts.split(";"))
   request = TagRequest(texts=texts, tags=tags)
   return response.model_dump_json(indent=2)
 if __name__=="__main__":
+  from pprint import pprint
+  #print(bucket(texts))
+  print(judge(texts))

judge.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from typing import List, Iterable, Optional
+from pydantic import BaseModel, ValidationInfo, model_validator, Field,  field_validator
+import instructor
+import openai
+import asyncio
+import os
+from groq import AsyncGroq
+# Initialize with API key
+client = AsyncGroq(api_key=os.getenv("GROQ_API_KEY"))
+# Enable instructor patches for Groq client
+client = instructor.from_groq(client)
+"""
+client = instructor.from_openai(
+    openai.AsyncOpenAI(
+        base_url="http://localhost:11434/v1",
+        api_key="ollama",
+    ),
+    mode=instructor.Mode.JSON,
+)
+"""
+llm = 'llama-3.1-8b-instant' if os.getenv("GROQ_API_KEY") else "qwen2.5" #"gemma3:12b" #"llama3.2" #"deepseek-r1"
+class Judgment(BaseModel):
+    thought: str = Field(...,
+        description="The step-by-step reasoning process used to analyze the reasoning and the answer", examples=["Let's think step by step...context explicit stated donation, therefore answer should be donation"]
+    )
+    justification: str = Field(...,
+        description="Explanation for the logical judgment, detailing key factors that led to the conclusion", examples=["sound reasoning if context stated donation, it is valid and logical for answer to be donation"]
+    )
+    logical: bool = Field(...,
+        description="Boolean judgment indicating whether the reasoning and the answer are logical or relevant (True) or not (False)", examples=[True, False]
+    )
+#MaybeJudgment=instructor.Maybe(Judgment)
+async def judge_relevance(reasoning: str, context:str, answer: str) -> Judgment:
+    return await client.chat.create(
+        model=llm, #"gpt-4",
+        temperature=0.3,
+        max_retries=3,
+        messages=[
+            {
+                "role": "system",
+                "content": """
+                    You are tasked with comparing a (reasoning + context) and a answer to determine if they are relevant to each other or logical in some way. Your goal is to analyze the content, context, and potential connections between the two.
+                    To determine if the (reasoning + context) and answer are relevant or logical, please follow these steps:
+                    1. Carefully read and understand both the reasoning and the answer.
+                    2. Identify the main topic, keywords, and concepts in the (reasoning + context).
+                    3. Analyze the answer for any mention of these topics, keywords, or concepts in <thought> tag
+                    4. Consider any potential indirect connections or implications that might link the reasoning and the answer. Deductive reasoning need to be valid and sound in <justification> tag
+                    5. Evaluate the overall logic and soundness in both the reasoning + context that lead to the answer in <logical> tag
+                    As you go through this process, please use a chain of thought approach. Write out your reasoning for each step inside <thought> tags.
+                    After your analysis, provide a boolean judgment on whether the reasoning and the answer are logical or relevant to each other. Use "true" if they are logical or relevant, and "false" if they are not.
+                    Before giving your final judgment, provide a justification for your decision. Explain the key factors that led to your conclusion.
+                    Please ensure your analysis is thorough, impartial, and based on the content provided.
+                """,
+            },
+            {
+                "role": "user",
+                "content": """
+                    Here is the question + context:
+                    <reasoning>
+                    {{reasoning}}
+                    </reasoning>
+                    <context>
+                    {{context}}
+                    </context>
+                    Here is the text:
+                    <answer>
+                    {{answer}}
+                    </answer>
+                """,
+            },
+        ],
+        response_model=Judgment, #(reasoning=reasoning, context=context, answer=answer)
+        context={"reasoning": reasoning, "context": context, "answer": answer},
+    )