def organize_messages(message, history): msg_ls = [dict( role = "system", content = "You are a helpful assistant.", )] for user, assistant in history: msg_ls.append(dict( role = "user", content = user, )) if assistant: msg_ls.append(dict( role = "assistant", content = assistant, )) msg_ls.append(dict( role = "user", content = message, )) return msg_ls def stream2display_text(stream_text, token_per_sec): if stream_text.startswith("think>"): stream_text = f"<{stream_text}" if not stream_text.startswith(""): return stream_text if not "" in stream_text: think_text, result_text = stream_text.replace("", ""), "" else: think_text, result_text = stream_text.split("") think_text = think_text.replace("", "") result_text = result_text.replace("<|im_end|>", "") think_block = "\n".join(f"> {line}" if line else ">" for line in think_text.rstrip().splitlines()) # display_text = f"{think_block}\n\n{result_text}" display_text_ls = [think_block] if result_text: display_text_ls.append(f"{result_text}") display_text_ls.append(f"```{token_per_sec:.2f} token/s```") display_text = "\n\n".join(display_text_ls) return display_text def mtp_new_tokens(pred_ids, gen_tk_count, existing_tk_count, stop_token_ids): output_ids = pred_ids[0][existing_tk_count:] if stop_token_ids: stop_token_ids_index = [ i for i, id in enumerate(output_ids) if id in stop_token_ids ] if len(stop_token_ids_index) > 0: output_ids = output_ids[: stop_token_ids_index[0]] new_tokens = output_ids[gen_tk_count:] return new_tokens, len(output_ids)