Spaces:

VIDraft
/

EveryRAG

Running

App Files Files Community

openfree commited on Dec 7, 2024

Commit

f2c0975

verified ·

1 Parent(s): 83ee74c

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -35

app.py CHANGED Viewed

@@ -8,20 +8,20 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 # Available LLM models
 LLM_MODELS = {
-    "Llama-3.3": "meta-llama/Llama-3.3-70B-Instruct",
-    "QwQ-32B": "Qwen/QwQ-32B-Preview",
-    "C4AI-Command": "CohereForAI/c4ai-command-r-plus-08-2024",
-    "Marco-o1": "AIDC-AI/Marco-o1",
-    "Qwen2.5": "Qwen/Qwen2.5-72B-Instruct",
-    "Mistral-Nemo": "mistralai/Mistral-Nemo-Instruct-2407",
-    "Nemotron-70B": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
 }
 # Default selected models
 DEFAULT_MODELS = [
-    "meta-llama/Llama-3.3-70B-Instruct",
-    "CohereForAI/c4ai-command-r-plus-08-2024",
-    "mistralai/Mistral-Nemo-Instruct-2407"
 ]
 # Initialize clients with token
@@ -46,7 +46,10 @@ def respond_single(
     temperature: float,
     top_p: float,
 ):
-    messages = [{"role": "system", "content": system_message}]
     for user, assistant in history:
         if user:
@@ -58,15 +61,18 @@ def respond_single(
     response = ""
     try:
-        for msg in client.text_generation(
-            prompt=message,
-            max_new_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
         ):
-            response += msg
-            yield response
     except Exception as e:
         yield f"Error: {str(e)}"
@@ -106,18 +112,11 @@ def respond_all(
         generate(clients[selected_models[2]], history3),
     )
 css = """
-footer {
-    visibility: hidden;
-}
 """
-with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     with gr.Row():
         model_choices = gr.Checkboxgroup(
             choices=list(LLM_MODELS.values()),
@@ -149,27 +148,27 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     with gr.Row():
         with gr.Column():
             system_message = gr.Textbox(
-                value="You are a friendly Chatbot.",
                 label="System message"
             )
             max_tokens = gr.Slider(
                 minimum=1,
-                maximum=2048,
-                value=512,
                 step=1,
                 label="Max new tokens"
             )
             temperature = gr.Slider(
-                minimum=0.1,
-                maximum=4.0,
                 value=0.7,
                 step=0.1,
                 label="Temperature"
             )
             top_p = gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.95,
                 step=0.05,
                 label="Top-p"
             )
@@ -181,6 +180,20 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
             placeholder="Enter text and press enter",
             container=False
         )
     def submit_message(message, file):
         return respond_all(
@@ -206,5 +219,4 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
 if __name__ == "__main__":
     if not HF_TOKEN:
         print("Warning: HF_TOKEN environment variable is not set")
-    demo.launch()

 # Available LLM models
 LLM_MODELS = {
+    "Mistral": "mistralai/Mistral-7B-Instruct-v0.2",
+    "Zephyr": "HuggingFaceH4/zephyr-7b-beta",
+    "OpenChat": "openchat/openchat-3.5",
+    "Llama2": "meta-llama/Llama-2-7b-chat-hf",
+    "Phi": "microsoft/phi-2",
+    "Neural": "nvidia/neural-chat-7b-v3-1",
+    "Starling": "HuggingFaceH4/starling-lm-7b-alpha"
 }
 # Default selected models
 DEFAULT_MODELS = [
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    "HuggingFaceH4/zephyr-7b-beta",
+    "openchat/openchat-3.5"
 ]
 # Initialize clients with token
     temperature: float,
     top_p: float,
 ):
+    system_prefix = """반드시 한글로 답변할것. 너는 주어진 내용을 기반으로 상세한 설명과 Q&A를 제공하는 역할이다.
+    아주 친절하고 자세하게 설명하라."""
+    messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}]
     for user, assistant in history:
         if user:
     response = ""
     try:
+        for msg in client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
         ):
+            if hasattr(msg.choices[0].delta, 'content'):
+                token = msg.choices[0].delta.content
+                if token is not None:
+                    response += token
+                    yield response
     except Exception as e:
         yield f"Error: {str(e)}"
         generate(clients[selected_models[2]], history3),
     )
 css = """
+footer {visibility: hidden}
 """
+with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
     with gr.Row():
         model_choices = gr.Checkboxgroup(
             choices=list(LLM_MODELS.values()),
     with gr.Row():
         with gr.Column():
             system_message = gr.Textbox(
+                value="당신은 친절한 AI 어시스턴트입니다.",
                 label="System message"
             )
             max_tokens = gr.Slider(
                 minimum=1,
+                maximum=8000,
+                value=4000,
                 step=1,
                 label="Max new tokens"
             )
             temperature = gr.Slider(
+                minimum=0,
+                maximum=1,
                 value=0.7,
                 step=0.1,
                 label="Temperature"
             )
             top_p = gr.Slider(
+                minimum=0,
+                maximum=1,
+                value=0.9,
                 step=0.05,
                 label="Top-p"
             )
             placeholder="Enter text and press enter",
             container=False
         )
+    examples = [
+        ["상세한 사용 방법을 마치 화면을 보면서 설명하듯이 4000 토큰 이상 자세히 설명하라"],
+        ["FAQ 20건을 상세하게 작성하라. 4000토큰 이상 사용하라."],
+        ["사용 방법과 차별점, 특징, 강점을 중심으로 4000 토큰 이상 유튜브 영상 스크립트 형태로 작성하라"],
+        ["본 서비스를 SEO 최적화하여 블로그 포스트로 4000 토큰 이상 작성하라"],
+        ["계속 이어서 답변하라"],
+    ]
+    gr.Examples(
+        examples=examples,
+        inputs=msg_input,
+        cache_examples=False
+    )
     def submit_message(message, file):
         return respond_all(
 if __name__ == "__main__":
     if not HF_TOKEN:
         print("Warning: HF_TOKEN environment variable is not set")
+    demo.launch()