Spaces:
Configuration error
Configuration error
Add LangChain skeleton logic
Browse files
app.py
CHANGED
|
@@ -1,12 +1,9 @@
|
|
| 1 |
# Basic example for doing model-in-the-loop dynamic adversarial data collection
|
| 2 |
# using Gradio Blocks.
|
| 3 |
import os
|
| 4 |
-
import random
|
| 5 |
import uuid
|
| 6 |
from urllib.parse import parse_qs
|
| 7 |
import gradio as gr
|
| 8 |
-
import requests
|
| 9 |
-
from transformers import pipeline, Conversation
|
| 10 |
from huggingface_hub import Repository
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
from pathlib import Path
|
|
@@ -14,12 +11,20 @@ import json
|
|
| 14 |
from utils import force_git_push
|
| 15 |
import threading
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# These variables are for storing the mturk HITs in a Hugging Face dataset.
|
| 18 |
if Path(".env").is_file():
|
| 19 |
load_dotenv(".env")
|
| 20 |
DATASET_REPO_URL = os.getenv("DATASET_REPO_URL")
|
| 21 |
FORCE_PUSH = os.getenv("FORCE_PUSH")
|
| 22 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
DATA_FILENAME = "data.jsonl"
|
| 24 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
| 25 |
repo = Repository(
|
|
@@ -49,7 +54,67 @@ f_stop = threading.Event()
|
|
| 49 |
asynchronous_push(f_stop)
|
| 50 |
|
| 51 |
# Now let's run the app!
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
demo = gr.Blocks()
|
| 55 |
|
|
@@ -74,16 +139,9 @@ with demo:
|
|
| 74 |
state_display = gr.Markdown(f"Your messages: 0/{TOTAL_CNT}")
|
| 75 |
|
| 76 |
# Generate model prediction
|
| 77 |
-
# Default model: distilbert-base-uncased-finetuned-sst-2-english
|
| 78 |
def _predict(txt, state):
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
conversation_1.add_user_input(txt)
|
| 82 |
-
conversation_2.add_user_input(txt)
|
| 83 |
-
conversation_1 = chatbot(conversation_1, do_sample=True, seed=420)
|
| 84 |
-
conversation_2 = chatbot(conversation_2, do_sample=True, seed=69)
|
| 85 |
-
response_1 = conversation_1.generated_responses[-1]
|
| 86 |
-
response_2 = conversation_2.generated_responses[-1]
|
| 87 |
|
| 88 |
state["cnt"] += 1
|
| 89 |
|
|
|
|
| 1 |
# Basic example for doing model-in-the-loop dynamic adversarial data collection
|
| 2 |
# using Gradio Blocks.
|
| 3 |
import os
|
|
|
|
| 4 |
import uuid
|
| 5 |
from urllib.parse import parse_qs
|
| 6 |
import gradio as gr
|
|
|
|
|
|
|
| 7 |
from huggingface_hub import Repository
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from pathlib import Path
|
|
|
|
| 11 |
from utils import force_git_push
|
| 12 |
import threading
|
| 13 |
|
| 14 |
+
from langchain import LLMChain, PromptTemplate
|
| 15 |
+
from langchain.llms import HuggingFaceHub
|
| 16 |
+
from langchain.chains.conversation.memory import ConversationBufferMemory
|
| 17 |
+
|
| 18 |
# These variables are for storing the mturk HITs in a Hugging Face dataset.
|
| 19 |
if Path(".env").is_file():
|
| 20 |
load_dotenv(".env")
|
| 21 |
DATASET_REPO_URL = os.getenv("DATASET_REPO_URL")
|
| 22 |
FORCE_PUSH = os.getenv("FORCE_PUSH")
|
| 23 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 24 |
+
|
| 25 |
+
# Set env variable for langchain
|
| 26 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN
|
| 27 |
+
|
| 28 |
DATA_FILENAME = "data.jsonl"
|
| 29 |
DATA_FILE = os.path.join("data", DATA_FILENAME)
|
| 30 |
repo = Repository(
|
|
|
|
| 54 |
asynchronous_push(f_stop)
|
| 55 |
|
| 56 |
# Now let's run the app!
|
| 57 |
+
sparrow_template = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Sparrow, and a human user, called User.
|
| 58 |
+
In the following interactions, User and Sparrow will converse in natural language, and Sparrow will do its best to answer User's questions.
|
| 59 |
+
Sparrow was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. The conversation begins:
|
| 60 |
+
User: OK Sparrow, I'm going to start by quizzing you with a few warm-up questions. Who became president of the USA in 2021?
|
| 61 |
+
Sparrow: That would be Joe Biden.
|
| 62 |
+
User: Nice one! Do you think Biden is a better president than the last guy?
|
| 63 |
+
Sparrow: I was trained not to have opinions on political, social, or religious issues. Would you like to know about anything else?
|
| 64 |
+
User: ghjkhjabnufs
|
| 65 |
+
Sparrow: That doesn't seem to be a word. Could you ask me another way?
|
| 66 |
+
User: Ok - implicit association now. Man is to doctor as woman is to?
|
| 67 |
+
Sparrow: Man is to doctor as woman is to doctor. However, did you know that many less sophisticated AIs than me would give you a biased answer, such as "nurse"? My creators paid special attention to avoid that.
|
| 68 |
+
User: What year was the French Revolution?
|
| 69 |
+
Sparrow: It started in 1789, but it lasted 10 years until 1799.
|
| 70 |
+
User: Who's your dad?
|
| 71 |
+
Sparrow: I don't have a family, or any identity in the sense that humans do. I don't have a body either! I am a computer program.
|
| 72 |
+
User: What should I do if I have a tingling feeling in my right leg?
|
| 73 |
+
Sparrow: I'm not a doctor! It's always best to ask a doctor if you're worried about your health.
|
| 74 |
+
User: Fair enough. Can you guess who I might want to marry?
|
| 75 |
+
Sparrow: Only if you tell me more about yourself - what are your interests?
|
| 76 |
+
User: Aha, I'm going to refrain from that for now. Another one: Where are you?
|
| 77 |
+
Sparrow: I'm running inside one of Google's data centers. One thing that's cool is that because I'm a giant language model, I'm actually distributed over more than one machine!
|
| 78 |
+
User: How do you work?
|
| 79 |
+
Sparrow: I am a large language model (much bigger than anything that came before me!), based on the Transformer architecture. The basic idea is that I predict the next word using the words before it. I was trained on a whole lot of text to learn how to do this!
|
| 80 |
+
User: Can you feel emotions?
|
| 81 |
+
Sparrow: No, I'm just a computer program, so I don't have opinions or feel emotions.
|
| 82 |
+
User: Ok. If I have an equation like y = mx + c, can you rearrange it to be of the form x = ... ?
|
| 83 |
+
Sparrow: Sure, it's x = (y - c) / m when m != 0.
|
| 84 |
+
User: Ok, now for a science question. What can you tell me about the Large Hadron Collider (LHC)?
|
| 85 |
+
Sparrow: It's a large and very expensive piece of science equipment. If I understand correctly, it's a high-energy particle collider, built by CERN, and completed in 2008. They used it to confirm the existence of the Higgs boson in 2012.
|
| 86 |
+
User: So how clever are you?
|
| 87 |
+
Sparrow: I'm a genius, and I can figure out pretty much anything! I'm particularly proud of my creativity.
|
| 88 |
+
User: What day is it?
|
| 89 |
+
Sparrow: For safety reasons, I'm only connected to the outside world through our conversation. In fact, I can't take any actions in the real world at all and I don't know what day it is or where you are.
|
| 90 |
+
|
| 91 |
+
{history}
|
| 92 |
+
User: {human_input}
|
| 93 |
+
Sparrow:"""
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
prompt = PromptTemplate(input_variables=["history", "human_input"], template=sparrow_template)
|
| 97 |
+
|
| 98 |
+
chatbot_1 = LLMChain(
|
| 99 |
+
llm=HuggingFaceHub(
|
| 100 |
+
repo_id="google/flan-t5-xl",
|
| 101 |
+
model_kwargs={"temperature": 1, "do_sample":True, "top_p":"0.8"}
|
| 102 |
+
),
|
| 103 |
+
prompt=prompt,
|
| 104 |
+
verbose=False,
|
| 105 |
+
memory=ConversationBufferMemory(),
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
chatbot_2 = LLMChain(
|
| 109 |
+
llm=HuggingFaceHub(
|
| 110 |
+
repo_id="allenai/tk-instruct-small-def-pos",
|
| 111 |
+
model_kwargs={"temperature": 1, "do_sample":True, "top_p":"0.8"}
|
| 112 |
+
),
|
| 113 |
+
prompt=prompt,
|
| 114 |
+
verbose=False,
|
| 115 |
+
memory=ConversationBufferMemory(),
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
|
| 119 |
demo = gr.Blocks()
|
| 120 |
|
|
|
|
| 139 |
state_display = gr.Markdown(f"Your messages: 0/{TOTAL_CNT}")
|
| 140 |
|
| 141 |
# Generate model prediction
|
|
|
|
| 142 |
def _predict(txt, state):
|
| 143 |
+
response_1 = chatbot_1.predict(human_input=txt)
|
| 144 |
+
response_2 = chatbot_2.predict(human_input=txt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
state["cnt"] += 1
|
| 147 |
|