Spaces:
Paused
Paused
况兑
eval: greedy decode + numeric strict; system: force full decimals; regressions: A/B/C/noisy
e45d7fc
| set -euo pipefail | |
| DATA1=subset10.numeric.jsonl | |
| DATA2=subset10.perturbed.chat.jsonl | |
| DATA3=subset10.perturbed.chat.norm.jsonl | |
| AD=./runs/overfit10_gold | |
| echo "[A] 原始集" | |
| python eval_simple.py --adapter "$AD" --data "$DATA1" | |
| echo "[B] 扰动集" | |
| python eval_simple.py --adapter "$AD" --data "$DATA2" | |
| echo "[C] 扰动归一化集" | |
| python eval_simple.py --adapter "$AD" --data "$DATA3" | |
| echo "==> 回归测试跑完" | |