dream-s1k-demo / test_regress.sh
况兑
eval: greedy decode + numeric strict; system: force full decimals; regressions: A/B/C/noisy
e45d7fc
raw
history blame contribute delete
414 Bytes
set -euo pipefail
DATA1=subset10.numeric.jsonl
DATA2=subset10.perturbed.chat.jsonl
DATA3=subset10.perturbed.chat.norm.jsonl
AD=./runs/overfit10_gold
echo "[A] 原始集"
python eval_simple.py --adapter "$AD" --data "$DATA1"
echo "[B] 扰动集"
python eval_simple.py --adapter "$AD" --data "$DATA2"
echo "[C] 扰动归一化集"
python eval_simple.py --adapter "$AD" --data "$DATA3"
echo "==> 回归测试跑完"