|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import copy |
|
|
import os |
|
|
from pathlib import Path |
|
|
from unittest.mock import patch |
|
|
|
|
|
import pytest |
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoModelForImageClassification |
|
|
|
|
|
from peft import LoraConfig, get_peft_model |
|
|
from peft.tuners.lora import ArrowConfig, create_arrow_model |
|
|
from peft.tuners.lora.arrow import _resolve_adapter_source |
|
|
from tests.testing_utils import hub_online_once |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="module") |
|
|
def workdir(tmp_path_factory): |
|
|
""" |
|
|
Create a temp directory and chdir into it for the duration of the module. |
|
|
""" |
|
|
wd = tmp_path_factory.mktemp("arrow_workdir") |
|
|
old_cwd = os.getcwd() |
|
|
os.chdir(wd) |
|
|
yield Path(wd) |
|
|
os.chdir(old_cwd) |
|
|
|
|
|
|
|
|
|
|
|
def _create_and_save_adapter(out_dir: Path, rank: int = 4): |
|
|
"""Helper: build a LoRA adapter around `model` and save into `out_dir`.""" |
|
|
|
|
|
cfg = LoraConfig(r=rank, target_modules=["c_attn"], fan_in_fan_out=True, init_lora_weights=False) |
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
model = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
peft_model = get_peft_model(model, cfg) |
|
|
peft_model.save_pretrained(out_dir) |
|
|
|
|
|
|
|
|
@pytest.fixture(scope="module") |
|
|
def ts_adapters(workdir: Path): |
|
|
""" |
|
|
Build 3 task-specific adapters and return their absolute paths |
|
|
""" |
|
|
abs_paths = [] |
|
|
for i in range(3): |
|
|
sub = f"{workdir}/ts{i}" |
|
|
_create_and_save_adapter(sub) |
|
|
abs_paths.append(sub) |
|
|
return abs_paths |
|
|
|
|
|
|
|
|
@pytest.fixture(scope="module") |
|
|
def gen_adapter(workdir: Path): |
|
|
"""Build 1 general-knowledge adapter and return its absolute path list.""" |
|
|
sub = f"{workdir}/gen0" |
|
|
_create_and_save_adapter(sub) |
|
|
return [sub] |
|
|
|
|
|
|
|
|
class TestArrowRouting: |
|
|
def test_incompatible_rank_raises(self, workdir: Path): |
|
|
""" |
|
|
Adding adapters with different ranks must raise a ValueError. |
|
|
""" |
|
|
|
|
|
sub_r4 = workdir / "rank4" |
|
|
sub_r8 = workdir / "rank8" |
|
|
_create_and_save_adapter(sub_r4, rank=4) |
|
|
_create_and_save_adapter(sub_r8, rank=8) |
|
|
|
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
base = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
|
|
|
|
|
|
with pytest.raises(ValueError, match=r"rank mismatch"): |
|
|
_ = create_arrow_model( |
|
|
base_model=base, |
|
|
task_specific_adapter_paths=[str(sub_r4), str(sub_r8)], |
|
|
arrow_config=ArrowConfig(top_k=1), |
|
|
) |
|
|
|
|
|
def test_arrow_differs_with_extra_expert(self, ts_adapters): |
|
|
""" |
|
|
Arrow with 2 experts vs Arrow with 3 experts must produce different logits. |
|
|
""" |
|
|
|
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
base_model_2 = copy.deepcopy(base_model_1) |
|
|
cfg_small = ArrowConfig(top_k=2) |
|
|
m_small = create_arrow_model( |
|
|
base_model=base_model_1, |
|
|
task_specific_adapter_paths=ts_adapters[:2], |
|
|
arrow_config=cfg_small, |
|
|
).eval() |
|
|
|
|
|
|
|
|
cfg_big = ArrowConfig(top_k=2) |
|
|
m_big = create_arrow_model( |
|
|
base_model=base_model_2, |
|
|
task_specific_adapter_paths=ts_adapters, |
|
|
arrow_config=cfg_big, |
|
|
).eval() |
|
|
|
|
|
x = torch.ones(1, 4, dtype=torch.long) |
|
|
assert not torch.allclose(m_small(x).logits, m_big(x).logits) |
|
|
|
|
|
def test_arrow_gks_with_load_adapter_later_with_forward(self, ts_adapters, gen_adapter): |
|
|
""" |
|
|
Loading the last expert after creating the arrow model should produce the same result as loading all the |
|
|
experts at once in create_arrow_model(), when forward path is called before adding the new adapter. |
|
|
""" |
|
|
|
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
base_model_2 = copy.deepcopy(base_model_1) |
|
|
cfg_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) |
|
|
m_big = create_arrow_model( |
|
|
base_model=base_model_1, |
|
|
task_specific_adapter_paths=ts_adapters, |
|
|
general_adapter_paths=gen_adapter, |
|
|
arrow_config=cfg_big, |
|
|
).eval() |
|
|
|
|
|
|
|
|
cfg_small_later_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) |
|
|
m_small_later_big = create_arrow_model( |
|
|
base_model=base_model_2, |
|
|
task_specific_adapter_paths=ts_adapters[:2], |
|
|
general_adapter_paths=gen_adapter, |
|
|
arrow_config=cfg_small_later_big, |
|
|
) |
|
|
|
|
|
|
|
|
x = torch.ones(1, 4, dtype=torch.long) |
|
|
m_small_later_big(x) |
|
|
|
|
|
|
|
|
m_small_later_big.load_adapter( |
|
|
model_id=ts_adapters[-1], |
|
|
adapter_name="new_added_ts_expert", |
|
|
) |
|
|
|
|
|
m_small_later_big.set_adapter("new_added_ts_expert") |
|
|
x = torch.ones(3, 5, dtype=torch.long) |
|
|
m_small_later_big(x) |
|
|
|
|
|
|
|
|
m_small_later_big.set_adapter("arrow_router") |
|
|
m_small_later_big.eval() |
|
|
|
|
|
x = torch.ones(1, 4, dtype=torch.long) |
|
|
assert torch.allclose(m_big(x).logits, m_small_later_big(x).logits) |
|
|
|
|
|
def test_arrow_with_load_adapter_later_with_forward_activate_new(self, ts_adapters, gen_adapter): |
|
|
""" |
|
|
Loading the last expert after creating the arrow model and activate it should produce different result compared |
|
|
to the case where arrow_router is activate, and the model's using arrow. |
|
|
""" |
|
|
|
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
base_model_2 = copy.deepcopy(base_model_1) |
|
|
cfg_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) |
|
|
m_big = create_arrow_model( |
|
|
base_model=base_model_1, |
|
|
task_specific_adapter_paths=ts_adapters, |
|
|
general_adapter_paths=gen_adapter, |
|
|
arrow_config=cfg_big, |
|
|
).eval() |
|
|
|
|
|
|
|
|
cfg_small_later_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) |
|
|
m_small_later_big = create_arrow_model( |
|
|
base_model=base_model_2, |
|
|
task_specific_adapter_paths=ts_adapters[:2], |
|
|
general_adapter_paths=gen_adapter, |
|
|
arrow_config=cfg_small_later_big, |
|
|
) |
|
|
|
|
|
|
|
|
x = torch.ones(1, 4, dtype=torch.long) |
|
|
m_small_later_big(x) |
|
|
|
|
|
|
|
|
m_small_later_big.load_adapter( |
|
|
model_id=ts_adapters[-1], |
|
|
adapter_name="new_added_ts_expert", |
|
|
) |
|
|
|
|
|
m_small_later_big.set_adapter("new_added_ts_expert") |
|
|
m_small_later_big.eval() |
|
|
|
|
|
x = torch.ones(1, 4, dtype=torch.long) |
|
|
assert not torch.allclose(m_big(x).logits, m_small_later_big(x).logits) |
|
|
|
|
|
def test_arrow_gks_with_load_adapter_later_without_forward(self, ts_adapters, gen_adapter): |
|
|
""" |
|
|
Loading the last expert after creating the arrow model should produce the same result as loading all the |
|
|
experts at once in create_arrow_model() |
|
|
""" |
|
|
|
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
base_model_2 = copy.deepcopy(base_model_1) |
|
|
cfg_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) |
|
|
m_big = create_arrow_model( |
|
|
base_model=base_model_1, |
|
|
task_specific_adapter_paths=ts_adapters, |
|
|
general_adapter_paths=gen_adapter, |
|
|
arrow_config=cfg_big, |
|
|
).eval() |
|
|
|
|
|
|
|
|
cfg_small_later_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) |
|
|
m_small_later_big = create_arrow_model( |
|
|
base_model=base_model_2, |
|
|
task_specific_adapter_paths=ts_adapters[:2], |
|
|
general_adapter_paths=gen_adapter, |
|
|
arrow_config=cfg_small_later_big, |
|
|
) |
|
|
|
|
|
|
|
|
m_small_later_big.load_adapter( |
|
|
model_id=ts_adapters[-1], |
|
|
adapter_name="new_added_ts_expert", |
|
|
) |
|
|
m_small_later_big.eval() |
|
|
|
|
|
x = torch.ones(1, 4, dtype=torch.long) |
|
|
assert torch.allclose(m_big(x).logits, m_small_later_big(x).logits) |
|
|
|
|
|
def test_genknowsub_changes_output(self, ts_adapters, gen_adapter): |
|
|
""" |
|
|
Arrow+GenKnowSub vs plain Arrow must change logits. |
|
|
""" |
|
|
|
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
base_model_2 = copy.deepcopy(base_model_1) |
|
|
cfg_plain = ArrowConfig(top_k=2) |
|
|
m_plain = create_arrow_model( |
|
|
base_model=base_model_1, |
|
|
task_specific_adapter_paths=ts_adapters, |
|
|
arrow_config=cfg_plain, |
|
|
).eval() |
|
|
|
|
|
|
|
|
cfg_gks = ArrowConfig(top_k=2, use_gks=True) |
|
|
m_gks = create_arrow_model( |
|
|
base_model=base_model_2, |
|
|
task_specific_adapter_paths=ts_adapters, |
|
|
general_adapter_paths=gen_adapter, |
|
|
arrow_config=cfg_gks, |
|
|
).eval() |
|
|
|
|
|
x = torch.ones(1, 4, dtype=torch.long) |
|
|
assert not torch.allclose(m_plain(x).logits, m_gks(x).logits) |
|
|
|
|
|
def test_merging_adapters_raise_error_in_arrow(self, ts_adapters): |
|
|
""" |
|
|
Merging/unmerging is not allowed while an ArrowLinearLayer is loaded on the model and active. |
|
|
""" |
|
|
|
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
base_model = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
cfg_small = ArrowConfig(top_k=2) |
|
|
m_small = create_arrow_model( |
|
|
base_model=base_model, |
|
|
task_specific_adapter_paths=ts_adapters[:2], |
|
|
arrow_config=cfg_small, |
|
|
).eval() |
|
|
|
|
|
with pytest.raises(RuntimeError, match=r"Cannot merge an active Arrow router adapter"): |
|
|
m_small.merge_and_unload() |
|
|
|
|
|
def test_conv2d_targets_raise_typeerror_in_arrow(self, workdir): |
|
|
""" |
|
|
Adapters applied to Conv2d must be rejected by create_arrow_model() which enforces Linear/Linear4bit-only |
|
|
targets. |
|
|
""" |
|
|
|
|
|
model_id = "hf-internal-testing/tiny-random-ResNetForImageClassification" |
|
|
with hub_online_once(model_id): |
|
|
base = AutoModelForImageClassification.from_pretrained(model_id) |
|
|
|
|
|
|
|
|
cfg = LoraConfig(r=4, target_modules=["convolution"], init_lora_weights=False) |
|
|
peft_model = get_peft_model(copy.deepcopy(base), cfg) |
|
|
|
|
|
conv_dir = workdir / "cv0" |
|
|
peft_model.save_pretrained(conv_dir) |
|
|
|
|
|
|
|
|
with pytest.raises(TypeError, match=r"LoRA adapters must only target Linear"): |
|
|
_ = create_arrow_model( |
|
|
base_model=base, |
|
|
task_specific_adapter_paths=[str(conv_dir)], |
|
|
arrow_config=ArrowConfig(top_k=1), |
|
|
) |
|
|
|
|
|
def test_arrow_forward_float16_no_autocast_with_merging(self, ts_adapters): |
|
|
""" |
|
|
Run Arrow in float16 with autocast disabled; forward should work, while merge/unmerge operations must raise for |
|
|
Arrow models. |
|
|
""" |
|
|
import platform |
|
|
|
|
|
try: |
|
|
_ = torch.zeros(1, dtype=torch.float16) |
|
|
except Exception: |
|
|
pytest.skip(reason="Test requires float16 support") |
|
|
|
|
|
if platform.system() == "Darwin": |
|
|
pytest.skip(reason="MacOS does not support multiple ops in float16") |
|
|
|
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
|
|
|
|
|
|
with hub_online_once(model_id): |
|
|
base = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16) |
|
|
|
|
|
cfg = ArrowConfig(top_k=2) |
|
|
|
|
|
|
|
|
model = create_arrow_model( |
|
|
base_model=base, |
|
|
task_specific_adapter_paths=ts_adapters, |
|
|
arrow_config=cfg, |
|
|
autocast_adapter_dtype=False, |
|
|
torch_dtype=torch.float16, |
|
|
).eval() |
|
|
|
|
|
X = { |
|
|
"input_ids": torch.ones(1, 4, dtype=torch.long), |
|
|
"attention_mask": torch.ones(1, 4, dtype=torch.long), |
|
|
} |
|
|
|
|
|
|
|
|
_ = model(**X) |
|
|
|
|
|
|
|
|
with pytest.raises(RuntimeError, match=r"Cannot merge an active Arrow router adapter"): |
|
|
model.merge_adapter(safe_merge=False) |
|
|
|
|
|
with pytest.raises(RuntimeError, match=r"Cannot merge an active Arrow router adapter"): |
|
|
_ = model.merge_and_unload() |
|
|
|
|
|
def test_prototypes_not_recomputed_on_repeated_forward(self, ts_adapters): |
|
|
""" |
|
|
Repeated calls to forward should not recompute prototypes. We verify by spying on |
|
|
ArrowLoraLinearLayer.top_right_singular_vec_from_BA(), which is only called when prototypes are (re)built. |
|
|
""" |
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
base = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
|
|
|
cfg = ArrowConfig(top_k=2) |
|
|
model = create_arrow_model( |
|
|
base_model=base, |
|
|
task_specific_adapter_paths=ts_adapters, |
|
|
arrow_config=cfg, |
|
|
).eval() |
|
|
|
|
|
|
|
|
arrow_layer = None |
|
|
for _, module in model.named_modules(): |
|
|
if hasattr(module, "lora_arrow") and "arrow_router" in module.lora_arrow: |
|
|
arrow_layer = module.lora_arrow["arrow_router"] |
|
|
break |
|
|
assert arrow_layer is not None, "Arrow router layer not found on model" |
|
|
|
|
|
x = torch.ones(1, 4, dtype=torch.long) |
|
|
|
|
|
|
|
|
with patch.object( |
|
|
arrow_layer, |
|
|
"top_right_singular_vec_from_BA", |
|
|
wraps=arrow_layer.top_right_singular_vec_from_BA, |
|
|
) as spy: |
|
|
_ = model(x) |
|
|
first_calls = spy.call_count |
|
|
assert first_calls == len(arrow_layer.task_adapter_names) |
|
|
|
|
|
|
|
|
_ = model(x) |
|
|
assert spy.call_count == first_calls |
|
|
|
|
|
|
|
|
def test_training_updates_when_task_adapter_active(ts_adapters): |
|
|
""" |
|
|
Ensure a simple training step works: compute a dummy loss, backward, and take an optimizer step. Verify that |
|
|
task-adapter parameters update. |
|
|
""" |
|
|
model_id = "hf-internal-testing/tiny-random-gpt2" |
|
|
with hub_online_once(model_id): |
|
|
base = AutoModelForCausalLM.from_pretrained(model_id) |
|
|
|
|
|
|
|
|
cfg = ArrowConfig(top_k=2) |
|
|
model = create_arrow_model( |
|
|
base_model=base, |
|
|
task_specific_adapter_paths=ts_adapters[:2], |
|
|
arrow_config=cfg, |
|
|
) |
|
|
model.train() |
|
|
|
|
|
|
|
|
model.set_adapter("task_0") |
|
|
|
|
|
|
|
|
rep_name = None |
|
|
for n, _ in model.named_parameters(): |
|
|
if ".lora_A.task_0.weight" in n: |
|
|
rep_name = n |
|
|
break |
|
|
assert rep_name is not None, "task_0 LoRA A weight not found" |
|
|
rep_param = dict(model.named_parameters())[rep_name] |
|
|
before = rep_param.detach().clone() |
|
|
|
|
|
|
|
|
opt = torch.optim.SGD([p for p in model.parameters() if p.requires_grad], lr=1e-2) |
|
|
|
|
|
|
|
|
vocab = model.config.vocab_size |
|
|
input_ids = torch.randint(0, vocab, (2, 8)) |
|
|
attention_mask = torch.ones_like(input_ids) |
|
|
|
|
|
|
|
|
opt.zero_grad() |
|
|
out = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids) |
|
|
assert hasattr(out, "loss") and out.loss is not None |
|
|
out.loss.backward() |
|
|
opt.step() |
|
|
|
|
|
after = rep_param.detach().clone() |
|
|
assert not torch.allclose(before, after), "Active task adapter parameters did not update after optimizer step" |
|
|
|
|
|
|
|
|
@pytest.mark.parametrize( |
|
|
"case", |
|
|
[ |
|
|
"local_root", |
|
|
"local_nested", |
|
|
"hub_repo", |
|
|
"hub_with_sub", |
|
|
], |
|
|
) |
|
|
def test_resolve_adapter_source_variants(tmp_path: Path, case: str): |
|
|
""" |
|
|
Ensure `_resolve_adapter_source` correctly handles: |
|
|
- Local dir (containing adapter_config.json) |
|
|
- Local nested subfolder |
|
|
- Hub repo id "user/repo" |
|
|
- Hub repo with subfolder "user/repo/sub/folder" |
|
|
""" |
|
|
if case == "local_root": |
|
|
d = tmp_path / "adapter_local_root" |
|
|
d.mkdir(parents=True, exist_ok=True) |
|
|
(d / "adapter_config.json").write_text("{}") |
|
|
model_id, sub = _resolve_adapter_source(str(d)) |
|
|
assert model_id == str(d) |
|
|
assert sub is None |
|
|
|
|
|
elif case == "local_nested": |
|
|
d = tmp_path / "repo_like" / "sub" / "folder" |
|
|
d.mkdir(parents=True, exist_ok=True) |
|
|
(d / "adapter_config.json").write_text("{}") |
|
|
model_id, sub = _resolve_adapter_source(str(d)) |
|
|
assert model_id == str(d) |
|
|
assert sub is None |
|
|
|
|
|
elif case == "hub_repo": |
|
|
model_id, sub = _resolve_adapter_source("user/repo") |
|
|
assert model_id == "user/repo" |
|
|
assert sub is None |
|
|
|
|
|
elif case == "hub_with_sub": |
|
|
model_id, sub = _resolve_adapter_source("user/repo/sub/folder") |
|
|
assert model_id == "user/repo" |
|
|
assert sub == "sub/folder" |
|
|
|
|
|
else: |
|
|
raise AssertionError(f"unknown case: {case}") |
|
|
|