Spaces:

Dify-AI
/

Baichuan2-13B-Chat

Paused

App Files Files Community

jZoNg commited on Sep 7, 2023

Commit

3360664

1 Parent(s): 3ccf1a4

remove comments

Browse files

Files changed (2) hide show

app.py +2 -32
model.py +6 -36

app.py CHANGED Viewed

@@ -41,7 +41,6 @@ def delete_prev_fn(
 def generate(
   message: str,
   history_with_input: list[tuple[str, str]],
-  system_prompt: str,
   max_new_tokens: int,
   temperature: float,
   top_p: float,
@@ -51,7 +50,7 @@ def generate(
     raise ValueError
   history = history_with_input[:-1]
-  generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
   try:
     first_response = next(generator)
     yield history + [(message, first_response)]
@@ -61,22 +60,11 @@ def generate(
     yield history + [(message, response)]
 def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
-  generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, DEFAULT_MAX_NEW_TOKENS, 1, 0.95, 5)
   for x in generator:
     pass
   return '', x
-def check_input_token_length(
-  message: str,
-  chat_history: list[tuple[str, str]],
-  system_prompt: str
-) -> None:
-  a = 1
-  # input_token_length = get_input_token_length(message, chat_history, system_prompt)
-  # if input_token_length > MAX_INPUT_TOKEN_LENGTH:
-  #     raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
 with gr.Blocks(css='style.css') as demo:
   gr.Markdown(DESCRIPTION)
   gr.DuplicateButton(
@@ -108,11 +96,6 @@ with gr.Blocks(css='style.css') as demo:
   saved_input = gr.State()
   with gr.Accordion(label='Advanced options', open=False):
-    system_prompt = gr.Textbox(
-      label='System prompt',
-      value=DEFAULT_SYSTEM_PROMPT,
-      lines=6
-    )
     max_new_tokens = gr.Slider(
       label='Max new tokens',
       minimum=1,
@@ -170,16 +153,10 @@ with gr.Blocks(css='style.css') as demo:
     api_name=False,
     queue=False,
   ).then(
-    fn=check_input_token_length,
-    inputs=[saved_input, chatbot, system_prompt],
-    api_name=False,
-    queue=False,
-  ).success(
     fn=generate,
     inputs=[
       saved_input,
       chatbot,
-      system_prompt,
       max_new_tokens,
       temperature,
       top_p,
@@ -202,16 +179,10 @@ with gr.Blocks(css='style.css') as demo:
     api_name=False,
     queue=False,
   ).then(
-    fn=check_input_token_length,
-    inputs=[saved_input, chatbot, system_prompt],
-    api_name=False,
-    queue=False,
-  ).success(
     fn=generate,
     inputs=[
       saved_input,
       chatbot,
-      system_prompt,
       max_new_tokens,
       temperature,
       top_p,
@@ -238,7 +209,6 @@ with gr.Blocks(css='style.css') as demo:
     inputs=[
       saved_input,
       chatbot,
-      system_prompt,
       max_new_tokens,
       temperature,
       top_p,

 def generate(
   message: str,
   history_with_input: list[tuple[str, str]],
   max_new_tokens: int,
   temperature: float,
   top_p: float,
     raise ValueError
   history = history_with_input[:-1]
+  generator = run(message, history, max_new_tokens, temperature, top_p, top_k)
   try:
     first_response = next(generator)
     yield history + [(message, first_response)]
     yield history + [(message, response)]
 def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
+  generator = generate(message, [], DEFAULT_MAX_NEW_TOKENS, 1, 0.95, 5)
   for x in generator:
     pass
   return '', x
 with gr.Blocks(css='style.css') as demo:
   gr.Markdown(DESCRIPTION)
   gr.DuplicateButton(
   saved_input = gr.State()
   with gr.Accordion(label='Advanced options', open=False):
     max_new_tokens = gr.Slider(
       label='Max new tokens',
       minimum=1,
     api_name=False,
     queue=False,
   ).then(
     fn=generate,
     inputs=[
       saved_input,
       chatbot,
       max_new_tokens,
       temperature,
       top_p,
     api_name=False,
     queue=False,
   ).then(
     fn=generate,
     inputs=[
       saved_input,
       chatbot,
       max_new_tokens,
       temperature,
       top_p,
     inputs=[
       saved_input,
       chatbot,
       max_new_tokens,
       temperature,
       top_p,

model.py CHANGED Viewed

@@ -24,41 +24,18 @@ tokenizer = AutoTokenizer.from_pretrained(
   trust_remote_code=True
 )
-def get_prompt(
-  message: str,
-  chat_history: list[tuple[str, str]],
-  system_prompt: str
-) -> str:
-  texts = [f'<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n']
-  # The first user input is _not_ stripped
-  do_strip = False
-  for user_input, response in chat_history:
-    user_input = user_input.strip() if do_strip else user_input
-    do_strip = True
-    texts.append(f'{user_input} [/INST] {response.strip()} </s><s>[INST] ')
-  message = message.strip() if do_strip else message
-  texts.append(f'{message} [/INST]')
-  return ''.join(texts)
-def get_input_token_length(
-  message: str,
-  chat_history: list[tuple[str, str]],
-  system_prompt: str
-) -> int:
-  prompt = get_prompt(message, chat_history, system_prompt)
-  input_ids = tokenizer([prompt], return_tensors='np', add_special_tokens=False)['input_ids']
-  return input_ids.shape[-1]
 def run(
   message: str,
   chat_history: list[tuple[str, str]],
-  system_prompt: str,
   max_new_tokens: int = 1024,
   temperature: float = 1.0,
   top_p: float = 0.95,
   top_k: int = 5
 ) -> Iterator[str]:
-  print(chat_history)
   history = []
   result=""
@@ -74,15 +51,8 @@ def run(
   for response in model.chat(
     tokenizer,
     history,
-    # stream=True,
-    # max_new_tokens=max_new_tokens,
-    # temperature=temperature,
-    # top_p=top_p,
-    # top_k=top_k,
   ):
-    print(response)
     result = result + response
     yield result
-    # if "content" in response["choices"][0]["delta"]:
-    #   result = result + response["choices"][0]["delta"]["content"]
-    #   yield result

   trust_remote_code=True
 )
 def run(
   message: str,
   chat_history: list[tuple[str, str]],
   max_new_tokens: int = 1024,
   temperature: float = 1.0,
   top_p: float = 0.95,
   top_k: int = 5
 ) -> Iterator[str]:
+  model.generation_config.max_new_tokens = max_new_tokens
+  model.generation_config.temperature = temperature
+  model.generation_config.top_p = top_p
+  model.generation_config.top_k = top_k
   history = []
   result=""
   for response in model.chat(
     tokenizer,
     history,
+    stream=True,
   ):
     result = result + response
     yield result