# Copyright 2025-present the HuggingFace Inc. team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import copy import os from pathlib import Path from unittest.mock import patch import pytest import torch from transformers import AutoModelForCausalLM, AutoModelForImageClassification from peft import LoraConfig, get_peft_model from peft.tuners.lora import ArrowConfig, create_arrow_model from peft.tuners.lora.arrow import _resolve_adapter_source from tests.testing_utils import hub_online_once # ─── Fixtures ────────────────────────────────────────────────────────── @pytest.fixture(scope="module") def workdir(tmp_path_factory): """ Create a temp directory and chdir into it for the duration of the module. """ wd = tmp_path_factory.mktemp("arrow_workdir") old_cwd = os.getcwd() os.chdir(wd) yield Path(wd) os.chdir(old_cwd) # (pytest will auto-delete wd) def _create_and_save_adapter(out_dir: Path, rank: int = 4): """Helper: build a LoRA adapter around `model` and save into `out_dir`.""" # fan_in_fan_out is set to True because of GPT2 model that we use to avoid warning cfg = LoraConfig(r=rank, target_modules=["c_attn"], fan_in_fan_out=True, init_lora_weights=False) model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): model = AutoModelForCausalLM.from_pretrained(model_id) peft_model = get_peft_model(model, cfg) peft_model.save_pretrained(out_dir) @pytest.fixture(scope="module") def ts_adapters(workdir: Path): """ Build 3 task-specific adapters and return their absolute paths """ abs_paths = [] for i in range(3): sub = f"{workdir}/ts{i}" _create_and_save_adapter(sub) abs_paths.append(sub) return abs_paths @pytest.fixture(scope="module") def gen_adapter(workdir: Path): """Build 1 general-knowledge adapter and return its absolute path list.""" sub = f"{workdir}/gen0" _create_and_save_adapter(sub) return [sub] # list because create_arrow_model expects list class TestArrowRouting: def test_incompatible_rank_raises(self, workdir: Path): """ Adding adapters with different ranks must raise a ValueError. """ # Create two adapters with different ranks targeting the same modules sub_r4 = workdir / "rank4" sub_r8 = workdir / "rank8" _create_and_save_adapter(sub_r4, rank=4) _create_and_save_adapter(sub_r8, rank=8) model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): base = AutoModelForCausalLM.from_pretrained(model_id) # Expect create_arrow_model to raise due to rank mismatch with pytest.raises(ValueError, match=r"rank mismatch"): _ = create_arrow_model( base_model=base, task_specific_adapter_paths=[str(sub_r4), str(sub_r8)], arrow_config=ArrowConfig(top_k=1), ) def test_arrow_differs_with_extra_expert(self, ts_adapters): """ Arrow with 2 experts vs Arrow with 3 experts must produce different logits. """ # Arrow over first 2 experts model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) base_model_2 = copy.deepcopy(base_model_1) cfg_small = ArrowConfig(top_k=2) m_small = create_arrow_model( base_model=base_model_1, task_specific_adapter_paths=ts_adapters[:2], arrow_config=cfg_small, ).eval() # Arrow over all 3 experts cfg_big = ArrowConfig(top_k=2) m_big = create_arrow_model( base_model=base_model_2, task_specific_adapter_paths=ts_adapters, arrow_config=cfg_big, ).eval() x = torch.ones(1, 4, dtype=torch.long) assert not torch.allclose(m_small(x).logits, m_big(x).logits) def test_arrow_gks_with_load_adapter_later_with_forward(self, ts_adapters, gen_adapter): """ Loading the last expert after creating the arrow model should produce the same result as loading all the experts at once in create_arrow_model(), when forward path is called before adding the new adapter. """ # Arrow over all three experts model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) base_model_2 = copy.deepcopy(base_model_1) cfg_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) m_big = create_arrow_model( base_model=base_model_1, task_specific_adapter_paths=ts_adapters, general_adapter_paths=gen_adapter, arrow_config=cfg_big, ).eval() # Arrow over all 2 experts + loading the third expert later cfg_small_later_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) m_small_later_big = create_arrow_model( base_model=base_model_2, task_specific_adapter_paths=ts_adapters[:2], general_adapter_paths=gen_adapter, arrow_config=cfg_small_later_big, ) # Ensuring that the prototypes and gks are done one time by running a forward path x = torch.ones(1, 4, dtype=torch.long) m_small_later_big(x) # Now loading the third expert m_small_later_big.load_adapter( model_id=ts_adapters[-1], adapter_name="new_added_ts_expert", ) # Activating the new adapter and run forward path on it m_small_later_big.set_adapter("new_added_ts_expert") x = torch.ones(3, 5, dtype=torch.long) m_small_later_big(x) # Now we switch back to the arrow_router m_small_later_big.set_adapter("arrow_router") m_small_later_big.eval() x = torch.ones(1, 4, dtype=torch.long) assert torch.allclose(m_big(x).logits, m_small_later_big(x).logits) def test_arrow_with_load_adapter_later_with_forward_activate_new(self, ts_adapters, gen_adapter): """ Loading the last expert after creating the arrow model and activate it should produce different result compared to the case where arrow_router is activate, and the model's using arrow. """ # Arrow over all three experts model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) base_model_2 = copy.deepcopy(base_model_1) cfg_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) m_big = create_arrow_model( base_model=base_model_1, task_specific_adapter_paths=ts_adapters, general_adapter_paths=gen_adapter, arrow_config=cfg_big, ).eval() # Arrow over all 2 experts + loading the third expert later cfg_small_later_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) m_small_later_big = create_arrow_model( base_model=base_model_2, task_specific_adapter_paths=ts_adapters[:2], general_adapter_paths=gen_adapter, arrow_config=cfg_small_later_big, ) # Ensuring that the prototypes and gks are done one time by running a forward path x = torch.ones(1, 4, dtype=torch.long) m_small_later_big(x) # Now loading the third expert m_small_later_big.load_adapter( model_id=ts_adapters[-1], adapter_name="new_added_ts_expert", ) # The new adapter is activated m_small_later_big.set_adapter("new_added_ts_expert") m_small_later_big.eval() x = torch.ones(1, 4, dtype=torch.long) assert not torch.allclose(m_big(x).logits, m_small_later_big(x).logits) def test_arrow_gks_with_load_adapter_later_without_forward(self, ts_adapters, gen_adapter): """ Loading the last expert after creating the arrow model should produce the same result as loading all the experts at once in create_arrow_model() """ # Arrow over all three experts model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) base_model_2 = copy.deepcopy(base_model_1) cfg_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) m_big = create_arrow_model( base_model=base_model_1, task_specific_adapter_paths=ts_adapters, general_adapter_paths=gen_adapter, arrow_config=cfg_big, ).eval() # Arrow over all 2 experts + loading the third expert later cfg_small_later_big = ArrowConfig(top_k=2, use_gks=True, rng_seed=42) m_small_later_big = create_arrow_model( base_model=base_model_2, task_specific_adapter_paths=ts_adapters[:2], general_adapter_paths=gen_adapter, arrow_config=cfg_small_later_big, ) # Now loading the third expert m_small_later_big.load_adapter( model_id=ts_adapters[-1], adapter_name="new_added_ts_expert", ) m_small_later_big.eval() x = torch.ones(1, 4, dtype=torch.long) assert torch.allclose(m_big(x).logits, m_small_later_big(x).logits) def test_genknowsub_changes_output(self, ts_adapters, gen_adapter): """ Arrow+GenKnowSub vs plain Arrow must change logits. """ # Plain Arrow model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): base_model_1 = AutoModelForCausalLM.from_pretrained(model_id) base_model_2 = copy.deepcopy(base_model_1) cfg_plain = ArrowConfig(top_k=2) m_plain = create_arrow_model( base_model=base_model_1, task_specific_adapter_paths=ts_adapters, arrow_config=cfg_plain, ).eval() # Arrow + GenKnowSub cfg_gks = ArrowConfig(top_k=2, use_gks=True) m_gks = create_arrow_model( base_model=base_model_2, task_specific_adapter_paths=ts_adapters, general_adapter_paths=gen_adapter, arrow_config=cfg_gks, ).eval() x = torch.ones(1, 4, dtype=torch.long) assert not torch.allclose(m_plain(x).logits, m_gks(x).logits) def test_merging_adapters_raise_error_in_arrow(self, ts_adapters): """ Merging/unmerging is not allowed while an ArrowLinearLayer is loaded on the model and active. """ # Arrow over first 2 experts model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): base_model = AutoModelForCausalLM.from_pretrained(model_id) cfg_small = ArrowConfig(top_k=2) m_small = create_arrow_model( base_model=base_model, task_specific_adapter_paths=ts_adapters[:2], arrow_config=cfg_small, ).eval() with pytest.raises(RuntimeError, match=r"Cannot merge an active Arrow router adapter"): m_small.merge_and_unload() def test_conv2d_targets_raise_typeerror_in_arrow(self, workdir): """ Adapters applied to Conv2d must be rejected by create_arrow_model() which enforces Linear/Linear4bit-only targets. """ model_id = "hf-internal-testing/tiny-random-ResNetForImageClassification" with hub_online_once(model_id): base = AutoModelForImageClassification.from_pretrained(model_id) # Build a LoRA adapter targeting a Conv2d cfg = LoraConfig(r=4, target_modules=["convolution"], init_lora_weights=False) peft_model = get_peft_model(copy.deepcopy(base), cfg) conv_dir = workdir / "cv0" peft_model.save_pretrained(conv_dir) # Expect create_arrow_model to raise TypeError with pytest.raises(TypeError, match=r"LoRA adapters must only target Linear"): _ = create_arrow_model( base_model=base, task_specific_adapter_paths=[str(conv_dir)], arrow_config=ArrowConfig(top_k=1), ) def test_arrow_forward_float16_no_autocast_with_merging(self, ts_adapters): """ Run Arrow in float16 with autocast disabled; forward should work, while merge/unmerge operations must raise for Arrow models. """ import platform try: _ = torch.zeros(1, dtype=torch.float16) except Exception: pytest.skip(reason="Test requires float16 support") if platform.system() == "Darwin": pytest.skip(reason="MacOS does not support multiple ops in float16") model_id = "hf-internal-testing/tiny-random-gpt2" # Create base in fp16 (no manual assignment to .dtype) with hub_online_once(model_id): base = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16) cfg = ArrowConfig(top_k=2) # Build Arrow model and disable adapter dtype autocast model = create_arrow_model( base_model=base, task_specific_adapter_paths=ts_adapters, arrow_config=cfg, autocast_adapter_dtype=False, torch_dtype=torch.float16, ).eval() X = { "input_ids": torch.ones(1, 4, dtype=torch.long), "attention_mask": torch.ones(1, 4, dtype=torch.long), } # Forward should work in fp16 _ = model(**X) # Merge must fail on Arrow models with pytest.raises(RuntimeError, match=r"Cannot merge an active Arrow router adapter"): model.merge_adapter(safe_merge=False) with pytest.raises(RuntimeError, match=r"Cannot merge an active Arrow router adapter"): _ = model.merge_and_unload() def test_prototypes_not_recomputed_on_repeated_forward(self, ts_adapters): """ Repeated calls to forward should not recompute prototypes. We verify by spying on ArrowLoraLinearLayer.top_right_singular_vec_from_BA(), which is only called when prototypes are (re)built. """ model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): base = AutoModelForCausalLM.from_pretrained(model_id) cfg = ArrowConfig(top_k=2) model = create_arrow_model( base_model=base, task_specific_adapter_paths=ts_adapters, arrow_config=cfg, ).eval() # Find one Arrow layer instance on the model arrow_layer = None for _, module in model.named_modules(): if hasattr(module, "lora_arrow") and "arrow_router" in module.lora_arrow: arrow_layer = module.lora_arrow["arrow_router"] break assert arrow_layer is not None, "Arrow router layer not found on model" x = torch.ones(1, 4, dtype=torch.long) # Spy on the internal proto computation; should run once (E calls for E experts) with patch.object( arrow_layer, "top_right_singular_vec_from_BA", wraps=arrow_layer.top_right_singular_vec_from_BA, ) as spy: _ = model(x) first_calls = spy.call_count assert first_calls == len(arrow_layer.task_adapter_names) # Call forward again; prototypes should be cached, so no extra calls _ = model(x) assert spy.call_count == first_calls def test_training_updates_when_task_adapter_active(ts_adapters): """ Ensure a simple training step works: compute a dummy loss, backward, and take an optimizer step. Verify that task-adapter parameters update. """ model_id = "hf-internal-testing/tiny-random-gpt2" with hub_online_once(model_id): base = AutoModelForCausalLM.from_pretrained(model_id) # Build Arrow model over two experts cfg = ArrowConfig(top_k=2) model = create_arrow_model( base_model=base, task_specific_adapter_paths=ts_adapters[:2], arrow_config=cfg, ) model.train() # Switch to a specific task adapter for training (vanilla LoRA) model.set_adapter("task_0") # Choose a representative parameter to check updates (task_0 A weight) rep_name = None for n, _ in model.named_parameters(): if ".lora_A.task_0.weight" in n: rep_name = n break assert rep_name is not None, "task_0 LoRA A weight not found" rep_param = dict(model.named_parameters())[rep_name] before = rep_param.detach().clone() # Optimizer over trainable params (task_0 now active and trainable) opt = torch.optim.SGD([p for p in model.parameters() if p.requires_grad], lr=1e-2) # Dummy batch vocab = model.config.vocab_size input_ids = torch.randint(0, vocab, (2, 8)) attention_mask = torch.ones_like(input_ids) # Compute loss and update opt.zero_grad() out = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids) assert hasattr(out, "loss") and out.loss is not None out.loss.backward() opt.step() after = rep_param.detach().clone() assert not torch.allclose(before, after), "Active task adapter parameters did not update after optimizer step" @pytest.mark.parametrize( "case", [ "local_root", "local_nested", "hub_repo", "hub_with_sub", ], ) def test_resolve_adapter_source_variants(tmp_path: Path, case: str): """ Ensure `_resolve_adapter_source` correctly handles: - Local dir (containing adapter_config.json) - Local nested subfolder - Hub repo id "user/repo" - Hub repo with subfolder "user/repo/sub/folder" """ if case == "local_root": d = tmp_path / "adapter_local_root" d.mkdir(parents=True, exist_ok=True) (d / "adapter_config.json").write_text("{}") model_id, sub = _resolve_adapter_source(str(d)) assert model_id == str(d) assert sub is None elif case == "local_nested": d = tmp_path / "repo_like" / "sub" / "folder" d.mkdir(parents=True, exist_ok=True) (d / "adapter_config.json").write_text("{}") model_id, sub = _resolve_adapter_source(str(d)) assert model_id == str(d) assert sub is None elif case == "hub_repo": model_id, sub = _resolve_adapter_source("user/repo") assert model_id == "user/repo" assert sub is None elif case == "hub_with_sub": model_id, sub = _resolve_adapter_source("user/repo/sub/folder") assert model_id == "user/repo" assert sub == "sub/folder" else: raise AssertionError(f"unknown case: {case}")