Spaces:
Sleeping
Sleeping
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| https://cloud.siliconflow.cn/sft-d1rosn8o8n4s73ftpa1g/playground/chat/17885302852 | |
| https://docs.siliconflow.cn/cn/userguide/capabilities/reasoning | |
| Model Name: | |
| Pro/deepseek-ai/DeepSeek-R1 | |
| Tips: | |
| (1)thinking_budget: Must be greater than or equal to 1 | |
| (2)The selected model requires paid balance. Your paid balance is insufficient. Please top up and try again. | |
| Model Name: | |
| tencent/Hunyuan-A13B-Instruct | |
| Tips: | |
| (1)它在回答时总是会先思考,最后给出答案.这适合知识问答,但不符合我们Agent的需求. 后来强制其只能输出 A-E 中的一个字符(max_tokens=4),以完成评估. | |
| max_tokens=4, | |
| logit_bias={ | |
| 32: 100, | |
| 33: 100, | |
| 34: 100, | |
| 35: 100, | |
| 36: 100, | |
| 37: 100, | |
| }, | |
| Model Name: | |
| deepseek-ai/DeepSeek-R1 | |
| Tips: | |
| (1)为了让它只输出一个字符,设置 max_tokens=3 | |
| Model Name: | |
| Qwen/Qwen3-8B | |
| deepseek-ai/DeepSeek-R1-0528-Qwen3-8B | |
| deepseek-ai/DeepSeek-R1-Distill-Qwen-7B | |
| Tips: | |
| (1)为了让它只输出一个字符,设置 max_tokens=1 | |
| Model Name: | |
| baidu/ERNIE-4.5-300B-A47B | |
| Tips: | |
| (1)它可能使用的是bpe 分词, logit_bias 注释掉。 | |
| """ | |
| import argparse | |
| from datetime import datetime | |
| import json | |
| import os | |
| from pathlib import Path | |
| import re | |
| import sys | |
| import time | |
| from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装 | |
| pwd = os.path.abspath(os.path.dirname(__file__)) | |
| sys.path.append(os.path.join(pwd, "../")) | |
| from openai import OpenAI | |
| from project_settings import environment, project_path | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--model_name", | |
| # default="Pro/deepseek-ai/DeepSeek-R1", | |
| # default="tencent/Hunyuan-A13B-Instruct", | |
| # default="deepseek-ai/DeepSeek-V3", | |
| # default="deepseek-ai/DeepSeek-R1", | |
| # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", | |
| # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", | |
| # default="baidu/ERNIE-4.5-300B-A47B", | |
| # default="Qwen/Qwen3-8B", | |
| # default="Qwen/Qwen3-14B", | |
| # default="Qwen/Qwen3-32B", | |
| # default="Qwen/Qwen3-30B-A3B", | |
| # default="Qwen/Qwen3-30B-A3B-Instruct-2507", | |
| # default="Qwen/Qwen3-235B-A22B-Instruct-2507", | |
| # default="Qwen/QwQ-32B", | |
| default="Tongyi-Zhiwen/QwenLong-L1-32B", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--eval_dataset_name", | |
| default="agent-bingoplus-ph-200-chat.jsonl", | |
| # default="agent-lingoace-zh-80-chat.jsonl", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--eval_dataset_dir", | |
| default=(project_path / "data/dataset").as_posix(), | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--eval_data_dir", | |
| default=(project_path / "data/eval_data").as_posix(), | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--client", | |
| default="shenzhen_sase", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--service", | |
| default="siliconflow_api_key", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--create_time_str", | |
| # default="null", | |
| default="20250814_134104", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--interval", | |
| default=10, | |
| type=int | |
| ) | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| args = get_args() | |
| eval_dataset_dir = Path(args.eval_dataset_dir) | |
| eval_dataset_dir.mkdir(parents=True, exist_ok=True) | |
| eval_data_dir = Path(args.eval_data_dir) | |
| eval_data_dir.mkdir(parents=True, exist_ok=True) | |
| if args.create_time_str == "null": | |
| tz = ZoneInfo("Asia/Shanghai") | |
| now = datetime.now(tz) | |
| create_time_str = now.strftime("%Y%m%d_%H%M%S") | |
| # create_time_str = "20250724_090615" | |
| else: | |
| create_time_str = args.create_time_str | |
| eval_dataset = eval_dataset_dir / args.eval_dataset_name | |
| model_name_ = args.model_name.replace("/", "#") | |
| output_file = eval_data_dir / f"siliconflow/siliconflow/{model_name_}/{args.client}/{args.service}/{create_time_str}/{args.eval_dataset_name}.raw" | |
| output_file.parent.mkdir(parents=True, exist_ok=True) | |
| api_key = environment.get(args.service, dtype=str) | |
| client = OpenAI( | |
| base_url="https://api.siliconflow.cn/v1/", | |
| # Read your Ark API Key from the environment variable. | |
| api_key=api_key | |
| ) | |
| total = 0 | |
| # finished | |
| finished_idx_set = set() | |
| if os.path.exists(output_file.as_posix()): | |
| with open(output_file.as_posix(), "r", encoding="utf-8") as f: | |
| for row in f: | |
| row = json.loads(row) | |
| idx = row["idx"] | |
| total = row["total"] | |
| finished_idx_set.add(idx) | |
| print(f"finished count: {len(finished_idx_set)}") | |
| with open(eval_dataset.as_posix(), "r", encoding="utf-8") as fin, open(output_file.as_posix(), "a+", encoding="utf-8") as fout: | |
| for row in fin: | |
| row = json.loads(row) | |
| idx = row["idx"] | |
| prompt = row["prompt"] | |
| response = row["response"] | |
| if idx in finished_idx_set: | |
| continue | |
| finished_idx_set.add(idx) | |
| # prompt | |
| splits = prompt[::-1].split("\n\n", maxsplit=1) | |
| conversation = splits[0] | |
| system_prompt = splits[1] | |
| conversation = conversation[::-1].strip() | |
| system_prompt = system_prompt[::-1].strip() | |
| pattern = "^(Client|Assistant): (.*?)(?=\n(?:Client|Assistant):)" | |
| match = re.findall(pattern=pattern, string=conversation, flags=re.I|re.DOTALL|re.MULTILINE) | |
| messages_ = list() | |
| for m in match: | |
| role = m[0].lower() | |
| content = m[1] | |
| if role in ("client", "Client"): | |
| role = "user" | |
| elif role in ("assistant", "Assistant"): | |
| role = "assistant" | |
| else: | |
| raise AssertionError | |
| messages_.append({ | |
| "role": role, | |
| "content": content | |
| }) | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| *messages_ | |
| ] | |
| try: | |
| time.sleep(args.interval) | |
| print(f"sleep: {args.interval}") | |
| time_begin = time.time() | |
| completion = client.chat.completions.create( | |
| model=args.model_name, | |
| messages=messages, | |
| stream=False, | |
| max_tokens=4096, | |
| # max_tokens=1, | |
| temperature=0.6, | |
| top_p=0.95, | |
| # logit_bias={ | |
| # 32: 100, | |
| # 33: 100, | |
| # 34: 100, | |
| # 35: 100, | |
| # 36: 100, | |
| # 37: 100, | |
| # 38: 100, | |
| # 39: 100, | |
| # }, | |
| extra_body={ | |
| "thinking_budget": 1 | |
| } | |
| ) | |
| time_cost = time.time() - time_begin | |
| print(f"time_cost: {time_cost}") | |
| except Exception as e: | |
| print(f"request failed, error type: {type(e)}, error text: {str(e)}") | |
| continue | |
| prediction = completion.choices[0].message.content | |
| total += 1 | |
| row_ = { | |
| "idx": idx, | |
| "prompt": prompt, | |
| "response": response, | |
| "prediction": prediction, | |
| "total": total, | |
| "time_cost": time_cost, | |
| } | |
| row_ = json.dumps(row_, ensure_ascii=False) | |
| fout.write(f"{row_}\n") | |
| fout.flush() | |
| return | |
| if __name__ == "__main__": | |
| main() | |