diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README.md b/README.md
index 7aa7aa128f7f42afe7b81fed4760c043781c5438..5327721df6ae7471380844f0ab90f29341c52ea9 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,245 @@
----
-title: VLM Lens
-emoji: 💬
-colorFrom: yellow
-colorTo: purple
-sdk: gradio
-sdk_version: 5.42.0
-app_file: app.py
-pinned: false
-hf_oauth: true
-hf_oauth_scopes:
-- inference-api
-license: apache-2.0
----
-
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
+#
VLM-Lens
+
+[](https://www.python.org/downloads/release/python-31012/)
+[](https://www.apache.org/licenses/LICENSE-2.0)
+[](https://compling-wat.github.io/vlm-lens/)
+[](tutorial-notebooks/guide.ipynb)
+[](https://colab.research.google.com/drive/13WC4HA6syXFotmn7S8WsVz4OmoHsfHV9?usp=sharing)
+
+
+
+
+
+## Table of Contents
+
+- [Environment Setup](#environment-setup)
+- [Example Usage: Extract Qwen2-VL-2B Embeddings with VLM-Lens](#example-usage-extract-qwen2-vl-2b-embeddings-with-vlm-lens)
+ - [General Command-Line Demo](#general-command-line-demo)
+ - [Run Qwen2-VL-2B Embeddings Extraction](#run-qwen2-vl-2b-embeddings-extraction)
+- [Layers of Interest in a VLM](#layers-of-interest-in-a-vlm)
+ - [Retrieving All Named Modules](#retrieving-all-named-modules)
+ - [Matching Layers](#matching-layers)
+- [Feature Extraction using HuggingFace Datasets](#feature-extraction-using-huggingface-datasets)
+- [Output Database](#output-database)
+- [Demo: Principal Component Analysis over Primitive Concept](#principal-component-analysis-over-primitive-concept)
+- [Contributing to VLM-Lens](#contributing-to-vlm-lens)
+- [Miscellaneous](#miscellaneous)
+
+## Environment Setup
+We recommend using a virtual environment to manage your dependencies. You can create one using the following command to create a virtual environment under
+```bash
+virtualenv --no-download "venv/vlm-lens-base" --prompt "vlm-lens-base" # Or "python3.10 -m venv venv/vlm-lens-base"
+source venv/vlm-lens-base/bin/activate
+```
+
+Then, install the required dependencies:
+```bash
+pip install --upgrade pip
+pip install -r envs/base/requirements.txt
+```
+
+There are some models that require different dependencies, and we recommend creating a separate virtual environment for each of them to avoid conflicts.
+For such models, we have offered a separate `requirements.txt` file under `envs//requirements.txt`, which can be installed in the same way as above.
+All the model-specific environments are independent of the base environment, and can be installed individually.
+
+**Notes**:
+1. There may be local constraints (e.g., issues caused by cluster regulations) that cause failure of the above commands. In such cases, you are encouraged to modify it whenever fit. We welcome issues and pull requests to help us keep the dependencies up to date.
+2. Some models, due to the resources available at the development time, may not be fully supported on modern GPUs. While our released environments are tested on L40s GPUs, we recommend following the error messages to adjust the environment setups for your specific hardware.
+
+## Example Usage: Extract Qwen2-VL-2B Embeddings with VLM-Lens
+
+### General Command-Line Demo
+
+The general command to run the quick command-line demo is:
+```bash
+python -m src.main \
+ --config \
+ --debug
+```
+with an optional debug flag to see more detailed outputs.
+
+Note that the config file should be in yaml format, and that any arguments you want to send to the huggingface API should be under the `model` key.
+See `configs/models/qwen/qwen-2b.yaml` as an example.
+
+### Run Qwen2-VL-2B Embeddings Extraction
+The file `configs/models/qwen/qwen-2b.yaml` contains the configuration for running the Qwen2-VL-2B model.
+
+```yaml
+architecture: qwen # Architecture of the model, see more options in src/models/configs.py
+model_path: Qwen/Qwen2-VL-2B-Instruct # HuggingFace model path
+model: # Model configuration, i.e., arguments to pass to the model
+ - torch_dtype: auto
+output_db: output/qwen.db # Output database file to store embeddings
+input_dir: ./data/ # Directory containing images to process
+prompt: "Describe the color in this image in one word." # Textual prompt
+pooling_method: None # Pooling method to use for aggregating token embeddings over tokens (options: None, mean, max)
+modules: # List of modules to extract embeddings from
+ - lm_head
+ - visual.blocks.31
+```
+
+To run the extraction on available GPU, use the following command:
+```bash
+python -m src.main --config configs/models/qwen/qwen-2b.yaml --debug
+```
+
+If there is no GPU available, you can run it on CPU with:
+```bash
+python -m src.main --config configs/models/qwen/qwen-2b.yaml --device cpu --debug
+```
+
+## Layers of Interest in a VLM
+### Retrieving All Named Modules
+Unfortunately there is no way to find which layers to potentially match to without loading the model. This can take quite a bit of system time figuring out.
+
+Instead, we offer some cached results under `logs/` for each model, which were generated through including the `-l` or `--log-named-modules` flag when running `python -m src.main`.
+
+When running this flag, it is not necessary to set modules or anything besides the architecture and HuggingFace model path.
+
+### Matching Layers
+To automatically set up which layers to find/use, one should use the Unix style strings, where you can use `*` to denote wildcards.
+
+For example, if one wanted to match with all the attention layer's query projection layer for Qwen, simply add the following lines to the .yaml file:
+```
+modules:
+ - model.layers.*.self_attn.q_proj
+```
+## Feature Extraction using HuggingFace Datasets
+To use VLM-Lens with either hosted or local datasets, there are multiple methods you can use depending on the location of the input images.
+
+First, your dataset must be standardized to a format that includes the attributes of `prompt`, `label` and `image_path`. Here is a snippet of the `compling/coco-val2017-obj-qa-categories` dataset, adjusted with the former attributes:
+
+| id | prompt | label | image_path |
+|---|---|---|---|
+| 397,133 | Is this A photo of a dining table on the bottom | yes | /path/to/397133.png
+| 37,777 | Is this A photo of a dining table on the top | no | /path/to/37777.png
+
+This can be achieved manually or using the helper script in `scripts/map_datasets.py`.
+
+### Method 1: Using hosted datasets
+If you are using datasets hosted on a platform such as HuggingFace, you will either use images that are also *hosted*, or ones that are *downloaded locally* with an identifier to map back to the hosted dataset (e.g., filename).
+
+You must use the `dataset_path` attribute in your configuration file with the appropriate `dataset_split` (if it exists, otherwise leave it out).
+
+#### 1(a): Hosted Dataset with Hosted Images
+```yaml
+dataset:
+ - dataset_path: compling/coco-val2017-obj-qa-categories
+ - dataset_split: val2017
+```
+
+#### 1(b): Hosted Dataset with Local Images
+
+> 🚨 **NOTE**: The `image_path` attribute in the dataset must contain either filenames or relative paths, such that a cell value of `train/00023.png` can be joined with `image_dataset_path` to form the full absolute path: `/path/to/local/images/train/00023.png`. If the `image_path` attribute does not require any additional path joining, you can leave out the `image_dataset_path` attribute.
+
+```yaml
+dataset:
+ - dataset_path: compling/coco-val2017-obj-qa-categories
+ - dataset_split: val2017
+ - image_dataset_path: /path/to/local/images # downloaded using configs/dataset/download-coco.yaml
+```
+
+
+### Method 2: Using local datasets
+#### 2(a): Local Dataset containing Image Files
+```yaml
+dataset:
+ - local_dataset_path: /path/to/local/CLEVR
+ - dataset_split: train # leave out if unspecified
+```
+
+#### 2(b): Local Dataset with Separate Input Image Directory
+
+> 🚨 **NOTE**: The `image_path` attribute in the dataset must contain either filenames or relative paths, such that a cell value of `train/00023.png` can be joined with `image_dataset_path` to form the full absolute path: `/path/to/local/images/train/00023.png`. If the `image_path` attribute does not require any additional path joining, you can leave out the `image_dataset_path` attribute.
+
+```yaml
+dataset:
+ - local_dataset_path: /path/to/local/CLEVR
+ - dataset_split: train # leave out if unspecified
+ - image_dataset_path: /path/to/local/CLEVR/images
+```
+
+### Output Database
+Specified by the `-o` and `--output-db` flags, this specifies the specific output database we want. From this, in SQL we have a single table under the name `tensors` with the following columns:
+```
+name, architecture, timestamp, image_path, prompt, label, layer, tensor_dim, tensor
+```
+where each column contains:
+1. `name` represents the model path from HuggingFace.
+2. `architecture` is the supported flags above.
+3. `timestamp` is the specific time that the model was ran.
+4. `image_path` is the absolute path to the image.
+5. `prompt` stores the prompt used in that instance.
+6. `label` is an optional cell that stores the "ground-truth" answer, which is helpful in use cases such as classification.
+7. `layer` is the matched layer from `model.named_modules()`
+8. `pooling_method` is the pooling method used for aggregating token embeddings over tokens.
+9. `tensor_dim` is the dimension of the tensor saved.
+10. `tensor` is the embedding saved.
+
+## Principal Component Analysis over Primitive Concept
+
+### Data Collection
+
+Download license-free images for primitive concepts (e.g., colors):
+
+```bash
+pip install -r data/concepts/requirements.txt
+python -m data.concepts.download --config configs/concepts/colors.yaml
+```
+
+### Embedding Extraction
+
+Run the LLaVA model to obtain embeddings of the concept images:
+
+```bash
+python -m src.main --config configs/models/llava-7b/llava-7b-concepts-colors.yaml --device cuda
+```
+
+Also, run the LLaVA model to obtain embeddings of the test images:
+
+```bash
+python -m src.main --config configs/models/llava-7b/llava-7b.yaml --device cuda
+```
+
+### Run PCA
+
+Several PCA-based analysis scripts are provided:
+```bash
+pip install -r src/concepts/requirements.txt
+python -m src.concepts.pca
+python -m src.concepts.pca_knn
+python -m src.concepts.pca_separation
+```
+
+## Contributing to VLM-Lens
+
+We welcome contributions to VLM-Lens! If you have suggestions, improvements, or bug fixes, please consider submitting a pull request, and we are actively reviewing them.
+
+We generally follow the [Google Python Styles](https://google.github.io/styleguide/pyguide.html) to ensure readability, with a few exceptions stated in `.flake8`.
+We use pre-commit hooks to ensure code quality and consistency---please make sure to run the following scripts before committing:
+```python
+pip install pre-commit
+pre-commit install
+```
+
+
+## Miscellaneous
+
+### Using a Cache
+To use a specific cache, one should set the `HF_HOME` environment variable as so:
+```
+HF_HOME=./cache/ python -m src.main --config configs/models/clip/clip.yaml --debug
+```
+
+
+### Using Submodule-Based Models
+There are some models that require separate submodules to be cloned, such as Glamm.
+To use these models, please follow the instructions below to download the submodules.
+
+#### Glamm
+For Glamm (GroundingLMM), one needs to clone the separate submodules, which can be done with the following command:
+```
+git submodule update --recursive --init
+```
+
+See [our document](https://compling-wat.github.io/vlm-lens/tutorials/grounding-lmm.html) for details on the installation.
diff --git a/app.py b/app.py
index bec204167857e75cae4ce3a9355e169be2409d68..296c41e44380365f63aff92963dabbbc9fa92e36 100644
--- a/app.py
+++ b/app.py
@@ -1,70 +1,7 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-
-
-def respond(
- message,
- history: list[dict[str, str]],
- system_message,
- max_tokens,
- temperature,
- top_p,
- hf_token: gr.OAuthToken,
-):
- """
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
- """
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-
- messages = [{"role": "system", "content": system_message}]
-
- messages.extend(history)
-
- messages.append({"role": "user", "content": message})
-
- response = ""
-
- for message in client.chat_completion(
- messages,
- max_tokens=max_tokens,
- stream=True,
- temperature=temperature,
- top_p=top_p,
- ):
- choices = message.choices
- token = ""
- if len(choices) and choices[0].delta.content:
- token = choices[0].delta.content
-
- response += token
- yield response
-
-
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
- respond,
- type="messages",
- additional_inputs=[
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
- gr.Slider(
- minimum=0.1,
- maximum=1.0,
- value=0.95,
- step=0.05,
- label="Top-p (nucleus sampling)",
- ),
- ],
-)
-
-with gr.Blocks() as demo:
- with gr.Sidebar():
- gr.LoginButton()
- chatbot.render()
+# app.py
+from demo.launch_gradio import create_demo
+demo = create_demo()
if __name__ == "__main__":
demo.launch()
diff --git a/configs/concepts/colors.yaml b/configs/concepts/colors.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..33c7763cecfac975134bf8f80c139e57839e3924
--- /dev/null
+++ b/configs/concepts/colors.yaml
@@ -0,0 +1,69 @@
+# Creative Commons Image Downloader Configuration
+# This file defines the configuration for downloading images
+
+# Basic Settings
+num_images: 10
+normalize_size: 256
+data_dir: "./data/images/colors/"
+results_filename: "colors.json"
+
+# Search Provider Configuration
+# Options: google, bing, both
+search_provider: "both"
+retrieve_multiplier: 10
+
+# Image Processing Settings
+image_format: "JPEG" # JPEG, PNG, WEBP
+image_quality: 90
+load_truncated_images: true
+
+# Download Settings
+max_size_mb: 10
+timeout: 15
+
+# Deduplication Settings
+enable_deduplication: true
+similarity_threshold: 5
+
+# Rate Limiting (seconds)
+request_delay_min: 0.5
+request_delay_max: 1.5
+
+# Search Queries - Color Concepts
+queries:
+ - "red"
+ - "blue"
+ - "yellow"
+ - "green"
+ - "orange"
+ - "purple"
+ - "white"
+ - "black"
+ - "gray"
+ - "silver"
+ - "gold"
+ - "pink"
+ - "brown"
+ - "beige"
+ - "crimson"
+ - "maroon"
+ - "cyan"
+ - "turquoise"
+ - "violet"
+ - "magenta"
+
+# User Agent Rotation
+user_agents:
+ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+ - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+ - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+
+# Direct Image Extensions
+direct_img_extensions:
+ - ".jpg"
+ - ".jpeg"
+ - ".png"
+ - ".gif"
+ - ".webp"
+ - ".bmp"
+ - ".tiff"
diff --git a/configs/dataset/download-clevr.yaml b/configs/dataset/download-clevr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c26453ce42972d85ef8ad836d80d5ba2f4b10837
--- /dev/null
+++ b/configs/dataset/download-clevr.yaml
@@ -0,0 +1,6 @@
+dataset_path: compling/CLEVR_val_categories
+split_name: [color, shape, material, size, boolean, number]
+parent_folder: CLEVR
+dataset_download_place: data/CLEVR/CLEVR_dataset
+img_url: https://dl.fbaipublicfiles.com/clevr/CLEVR_v1.0.zip
+img_download_place: data/CLEVR/CLEVR_v1.0.zip
diff --git a/configs/dataset/download-coco.yaml b/configs/dataset/download-coco.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b78ecfe997c53c4b3811a55afe8bebc047161870
--- /dev/null
+++ b/configs/dataset/download-coco.yaml
@@ -0,0 +1,6 @@
+dataset_path: compling/coco-val2017-obj-qa-categories
+split_name: val2017
+parent_folder: COCO
+dataset_download_place: data/COCO/COCO_dataset
+img_url: http://images.cocodataset.org/zips/val2017.zip
+img_download_place: data/COCO/val2017.zip
diff --git a/configs/dataset/download-gqa.yaml b/configs/dataset/download-gqa.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d4ee17e6d984dd8825d52fca2e59fc662296647b
--- /dev/null
+++ b/configs/dataset/download-gqa.yaml
@@ -0,0 +1,6 @@
+dataset_path: compling/GQA_dataset_binary
+split_name: val
+parent_folder: GQA
+dataset_download_place: data/GQA/GQA_dataset
+img_url: http://downloads.cs.stanford.edu/nlp/data/gqa/images.zip
+img_download_place: data/GQA/images.zip
diff --git a/configs/models/aya-vision-8b/aya-vision-8b.yaml b/configs/models/aya-vision-8b/aya-vision-8b.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a50c2b369defa5621f432f1b07eda10fb28ffe34
--- /dev/null
+++ b/configs/models/aya-vision-8b/aya-vision-8b.yaml
@@ -0,0 +1,10 @@
+architecture: aya-vision
+model_path: CohereLabs/aya-vision-8b
+model:
+ - torch_dtype: float16
+output_db: output/aya-vision.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - language_model.model.layers.15.input_layernorm
+ - language_model.model.layers.31.input_layernorm
diff --git a/configs/models/blip2/blip2.yaml b/configs/models/blip2/blip2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..22292d3bcff927aa64e6ace45c7dd9579123468b
--- /dev/null
+++ b/configs/models/blip2/blip2.yaml
@@ -0,0 +1,10 @@
+architecture: blip2
+model_path: Salesforce/blip2-opt-2.7b
+model:
+ - torch_dtype: auto
+output_db: output/blip2.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - language_model.lm_head
+ - vision_model.post_layernorm
diff --git a/configs/models/clip/clip.yaml b/configs/models/clip/clip.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6bb2132927133b82643e1f785237f7445c465ea1
--- /dev/null
+++ b/configs/models/clip/clip.yaml
@@ -0,0 +1,10 @@
+architecture: clip
+model_path: openai/clip-vit-base-patch32
+model:
+ - torch_dtype: auto
+output_db: output/clip.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - visual_projection
+ - text_projection
diff --git a/configs/models/cogvlm/cogvlm-chat.yaml b/configs/models/cogvlm/cogvlm-chat.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3871b5bffb2517a01af03b848a961036c073ecce
--- /dev/null
+++ b/configs/models/cogvlm/cogvlm-chat.yaml
@@ -0,0 +1,17 @@
+architecture: cogvlm
+model_path: THUDM/cogvlm-chat-hf
+model:
+ - low_cpu_mem_usage: True
+ - trust_remote_code: True
+ - tokenizer_path: lmsys/vicuna-7b-v1.5
+ - legacy: True
+ - template_version: vqa
+forward:
+ - max_new_tokens: 2048
+ - do_sample: False
+output_db: output/cogvlm.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - model.vision.transformer.layers.62
+ - lm_head
diff --git a/configs/models/glamm/glamm.yaml b/configs/models/glamm/glamm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d5bd7e7c72f991d1993fde30a830c21904f5a16
--- /dev/null
+++ b/configs/models/glamm/glamm.yaml
@@ -0,0 +1,19 @@
+architecture: glamm
+model_path: MBZUAI/GLaMM-FullScope
+model:
+ - image_size: 1024
+ - model_max_length: 1536
+ - lora_r: 8
+ - vision_tower: "openai/clip-vit-large-patch14-336"
+ - local_rank: 0
+ - use_mm_start_end: true
+ - conv_type: "llava_v1"
+forward:
+ - max_new_tokens: 1
+vis_save_path: "./vis_output"
+output_db: glamm.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - model.layers.15.post_attention_layernorm
+ - model.layers.31.post_attention_layernorm
diff --git a/configs/models/internlm-xc/internlm-xc.yaml b/configs/models/internlm-xc/internlm-xc.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..dd8552eb63295242a30392f7ad3efe46412764fb
--- /dev/null
+++ b/configs/models/internlm-xc/internlm-xc.yaml
@@ -0,0 +1,13 @@
+architecture: internlm-xcomposer
+model_path: internlm/internlm-xcomposer2d5-7b
+model:
+ - torch_dtype: auto
+ - attn_implementation: eager
+forward:
+ - max_new_tokens: 1
+output_db: output/internlm.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - model.layers.31
+ - vit.vision_tower.vision_model.encoder.layers.23
diff --git a/configs/models/internvl/internvl.yaml b/configs/models/internvl/internvl.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c1592a36db826c8dc67d074f0c6f3e0979159719
--- /dev/null
+++ b/configs/models/internvl/internvl.yaml
@@ -0,0 +1,14 @@
+architecture: internvl
+model_path: OpenGVLab/InternVL2_5-8B
+model:
+ - torch_dtype: bfloat16
+ - trust_remote_code: True
+forward:
+ - max_new_tokens: 1
+ - do_sample: True
+output_db: output/internvl.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - vision_model.encoder.layers.23
+ - language_model.output
diff --git a/configs/models/janus/janus-pro-1b.yaml b/configs/models/janus/janus-pro-1b.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ec21c63de79ab001f16d9485d4c0398c5140262a
--- /dev/null
+++ b/configs/models/janus/janus-pro-1b.yaml
@@ -0,0 +1,14 @@
+architecture: janus
+model_path: deepseek-community/Janus-Pro-1B
+model:
+ - attn_implementation: eager
+forward:
+ - max_new_tokens: 1
+ - do_sample: True
+ - generation_mode: 'text'
+output_db: output/janus.db
+input_dir: ./data
+prompt: "Describe the color in this image in one word."
+modules:
+ - model.language_model.layers.11.post_attention_layernorm
+ - model.language_model.layers.23.post_attention_layernorm
diff --git a/configs/models/llava-7b/llava-7b-clevr.yaml b/configs/models/llava-7b/llava-7b-clevr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4c62512065548b46cf38b14f1bc3622fbde97241
--- /dev/null
+++ b/configs/models/llava-7b/llava-7b-clevr.yaml
@@ -0,0 +1,10 @@
+architecture: llava
+model_path: llava-hf/llava-1.5-7b-hf
+dataset:
+ - dataset_path: compling/CLEVR_categories
+ - dataset_split: boolean
+output_db: output/llava-boolean.db
+pooling_method: mean
+modules:
+ - language_model.model.layers.16.post_attention_layernorm
+ - language_model.model.layers.31.post_attention_layernorm
diff --git a/configs/models/llava-7b/llava-7b-concepts-colors.yaml b/configs/models/llava-7b/llava-7b-concepts-colors.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a0a53c765342f3309529f65f7614891abcf4d86f
--- /dev/null
+++ b/configs/models/llava-7b/llava-7b-concepts-colors.yaml
@@ -0,0 +1,8 @@
+architecture: llava
+model_path: llava-hf/llava-1.5-7b-hf
+input_dir: ./data/images/colors/
+prompt: "Describe the color in this image in one word."
+output_db: output/llava-concepts-colors.db
+modules:
+ - vision_tower.vision_model.encoder.layers.23.layer_norm2
+ - language_model.model.layers.31.post_attention_layernorm
diff --git a/configs/models/llava-7b/llava-7b.yaml b/configs/models/llava-7b/llava-7b.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6f9114b01afada64c398f980599b205e4be01efa
--- /dev/null
+++ b/configs/models/llava-7b/llava-7b.yaml
@@ -0,0 +1,9 @@
+architecture: llava
+model_path: llava-hf/llava-1.5-7b-hf
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+output_db: output/llava.db
+modules:
+ - vision_tower.vision_model
+ - vision_tower.vision_model.encoder.layers.23.layer_norm2
+ - language_model.model.layers.31.post_attention_layernorm
diff --git a/configs/models/minicpm-V2/minicpm-V2.yaml b/configs/models/minicpm-V2/minicpm-V2.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..63dfcd8b090a668060c91d9f2b53dc5ac4630304
--- /dev/null
+++ b/configs/models/minicpm-V2/minicpm-V2.yaml
@@ -0,0 +1,13 @@
+architecture: minicpm
+model_path: compling/MiniCPM-V-2
+model:
+ - torch_dtype: auto
+ - trust_remote_code: True
+forward:
+ - max_new_tokens: 1
+output_db: output/minicpm.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - llm.lm_head
+ - vpm.encoder.layers.26
diff --git a/configs/models/minicpm-o/minicpm-o.yaml b/configs/models/minicpm-o/minicpm-o.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..186e7c31a1e62422b554b8a78f6ccb232cd8b7a3
--- /dev/null
+++ b/configs/models/minicpm-o/minicpm-o.yaml
@@ -0,0 +1,14 @@
+architecture: minicpm
+model_path: openbmb/MiniCPM-o-2_6
+model:
+ - torch_dtype: auto
+ - trust_remote_code: True
+ - attn_implementation: sdpa
+forward:
+ - max_new_tokens: 1
+output_db: output/minicpm-o.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - llm.lm_head
+ - vpm.encoder.layers.26
diff --git a/configs/models/molmo/molmo-7b.yaml b/configs/models/molmo/molmo-7b.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..274a4f477fe1fa8fb47727250c41837dfcdd9cfd
--- /dev/null
+++ b/configs/models/molmo/molmo-7b.yaml
@@ -0,0 +1,13 @@
+architecture: molmo
+model_path: allenai/Molmo-7B-D-0924
+model:
+ - torch_dtype: auto
+forward:
+ - max_new_tokens: 1
+ - stop_strings: <|endoftext|>
+output_db: output/molmo.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - model.transformer.blocks.0
+ - model.vision_backbone.image_vit.transformer.resblocks.22
diff --git a/configs/models/paligemma/paligemma-3b.yaml b/configs/models/paligemma/paligemma-3b.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..17ba29a309b1160860d7e812186f7f9a0f9cb227
--- /dev/null
+++ b/configs/models/paligemma/paligemma-3b.yaml
@@ -0,0 +1,11 @@
+architecture: paligemma
+model_path: google/paligemma-3b-mix-224
+model:
+ - torch_dtype: auto
+ - token:
+output_db: output/paligemma.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - language_model.lm_head
+ - multi_modal_projector
diff --git a/configs/models/pixtral/pixtral-12b.yaml b/configs/models/pixtral/pixtral-12b.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3a726c16160105fb6ec87482094c7a93ea65eb26
--- /dev/null
+++ b/configs/models/pixtral/pixtral-12b.yaml
@@ -0,0 +1,9 @@
+architecture: pixtral
+model_path: mistralai/Pixtral-12B-2409
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+output_db: output/pixtral.db
+modules:
+ - layers.19.attention_norm
+ - layers.39.attention_norm
+download_path: "/project/aip-fredashi/fredashi/huggingface/pixtral-12b" # CHANGE THIS. This is the path to store the downloaded model. This dir will be created.
diff --git a/configs/models/plm/plm.yaml b/configs/models/plm/plm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6416e37cad5a51f7f71b5d1876799d92421cd762
--- /dev/null
+++ b/configs/models/plm/plm.yaml
@@ -0,0 +1,13 @@
+architecture: plm
+model_path: facebook/Perception-LM-1B
+model:
+ - torch_dtype: auto
+ - trust_remote_code: True
+forward:
+ - max_new_tokens: 1
+output_db: output/plm.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+modules:
+ - model.language_model.layers.7.post_attention_layernorm
+ - model.language_model.layers.15.post_attention_layernorm
diff --git a/configs/models/qwen/qwen-2b-clevr.yaml b/configs/models/qwen/qwen-2b-clevr.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eaba5d34c0aa7ead49cab1e8a1fb888195be7e00
--- /dev/null
+++ b/configs/models/qwen/qwen-2b-clevr.yaml
@@ -0,0 +1,10 @@
+architecture: qwen
+model_path: Qwen/Qwen2-VL-2B-Instruct
+dataset:
+ - dataset_path: compling/CLEVR_categories
+ - dataset_split: boolean
+output_db: output/qwen-boolean.db
+pooling_method: mean
+modules:
+ - model.layers.13.post_attention_layernorm
+ - model.layers.27.post_attention_layernorm
diff --git a/configs/models/qwen/qwen-2b.yaml b/configs/models/qwen/qwen-2b.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7d6053b0cfb38eecef8436e26bb2a2c5be4fa120
--- /dev/null
+++ b/configs/models/qwen/qwen-2b.yaml
@@ -0,0 +1,11 @@
+architecture: qwen
+model_path: Qwen/Qwen2-VL-2B-Instruct
+model:
+ - torch_dtype: auto
+output_db: output/qwen.db
+input_dir: ./data/test-images/
+prompt: "Describe the color in this image in one word."
+pooling_method: None
+modules:
+ - lm_head
+ - visual.blocks.31
diff --git a/configs/probe/llava/clevr-boolean-l16.yaml b/configs/probe/llava/clevr-boolean-l16.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..388eab21bdced16504803c16ffda843f719077a9
--- /dev/null
+++ b/configs/probe/llava/clevr-boolean-l16.yaml
@@ -0,0 +1,18 @@
+model:
+ - activation: ReLU
+ - hidden_size: 512
+ - num_layers: 2
+ - save_dir: output/llava_boolean_probe_l16
+training:
+ - batch_size: [64, 128, 1024]
+ - num_epochs: [50, 100, 200]
+ - learning_rate: [0.001, 0.0005, 0.0001]
+ - optimizer: AdamW
+ - loss: CrossEntropyLoss
+test:
+ - batch_size: 32
+ - loss: CrossEntropyLoss
+data:
+ - input_db: output/llava-boolean.db
+ - db_name: tensors
+ - input_layer: language_model.model.layers.16.post_attention_layernorm
diff --git a/configs/probe/qwen/clevr-boolean-l13-example.yaml b/configs/probe/qwen/clevr-boolean-l13-example.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2e79c4076facb64e4756c8f15d24f7afd65bda16
--- /dev/null
+++ b/configs/probe/qwen/clevr-boolean-l13-example.yaml
@@ -0,0 +1,18 @@
+model:
+ - activation: ReLU
+ - hidden_size: 512
+ - num_layers: 2
+ - save_dir: output/qwen_boolean_probe_l13
+training:
+ - batch_size: [64, 128]
+ - num_epochs: [50]
+ - learning_rate: [0.001]
+ - optimizer: AdamW
+ - loss: CrossEntropyLoss
+test:
+ - batch_size: 32
+ - loss: CrossEntropyLoss
+data:
+ - input_db: output/qwen-boolean.db
+ - db_name: tensors
+ - input_layer: model.layers.13.post_attention_layernorm
diff --git a/configs/probe/qwen/clevr-boolean-l13.yaml b/configs/probe/qwen/clevr-boolean-l13.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a39ee336cc3e8353a48982287ffc3d8fdca4b7fe
--- /dev/null
+++ b/configs/probe/qwen/clevr-boolean-l13.yaml
@@ -0,0 +1,18 @@
+model:
+ - activation: ReLU
+ - hidden_size: 512
+ - num_layers: 2
+ - save_dir: output/qwen_boolean_probe_l13
+training:
+ - batch_size: [64, 128, 1024]
+ - num_epochs: [50, 100, 200]
+ - learning_rate: [0.001, 0.0005, 0.0001]
+ - optimizer: AdamW
+ - loss: CrossEntropyLoss
+test:
+ - batch_size: 32
+ - loss: CrossEntropyLoss
+data:
+ - input_db: output/qwen-boolean.db
+ - db_name: tensors
+ - input_layer: model.layers.13.post_attention_layernorm
diff --git a/demo/.gradio/certificate.pem b/demo/.gradio/certificate.pem
new file mode 100644
index 0000000000000000000000000000000000000000..b85c8037f6b60976b2546fdbae88312c5246d9a3
--- /dev/null
+++ b/demo/.gradio/certificate.pem
@@ -0,0 +1,31 @@
+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----
diff --git a/demo/__init__.py b/demo/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9e8a1a0283e080b256fc4b62bde62e075391b75
--- /dev/null
+++ b/demo/__init__.py
@@ -0,0 +1,6 @@
+"""Demo package."""
+
+from . import _bootstrap # noqa: F401
+from .lookup import get_model_info # re-export for convenience
+
+__all__ = ['get_model_info']
diff --git a/demo/__pycache__/__init__.cpython-310.pyc b/demo/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fef6630f6dce89d6a82a4f2e2c587214d10e5996
Binary files /dev/null and b/demo/__pycache__/__init__.cpython-310.pyc differ
diff --git a/demo/__pycache__/_bootstrap.cpython-310.pyc b/demo/__pycache__/_bootstrap.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f697ae1b3813d23a1f1eb36febdc29269d1566a9
Binary files /dev/null and b/demo/__pycache__/_bootstrap.cpython-310.pyc differ
diff --git a/demo/__pycache__/launch_gradio.cpython-310.pyc b/demo/__pycache__/launch_gradio.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e72ae21325aea8f2ae66e44baba17bfe62e88411
Binary files /dev/null and b/demo/__pycache__/launch_gradio.cpython-310.pyc differ
diff --git a/demo/__pycache__/launch_interactive_gradio.cpython-310.pyc b/demo/__pycache__/launch_interactive_gradio.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..252f7c47af8a61e9a69066bd1b910951604b6545
Binary files /dev/null and b/demo/__pycache__/launch_interactive_gradio.cpython-310.pyc differ
diff --git a/demo/__pycache__/lookup.cpython-310.pyc b/demo/__pycache__/lookup.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2bafbecb31b7d4051797dfa511bd46c1f9bd9748
Binary files /dev/null and b/demo/__pycache__/lookup.cpython-310.pyc differ
diff --git a/demo/_bootstrap.py b/demo/_bootstrap.py
new file mode 100644
index 0000000000000000000000000000000000000000..2bdc4fcc92e8e3b148310834a879b17ca4ea042b
--- /dev/null
+++ b/demo/_bootstrap.py
@@ -0,0 +1,6 @@
+import sys
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(REPO_ROOT) not in sys.path:
+ sys.path.insert(0, str(REPO_ROOT))
diff --git a/demo/launch_gradio.py b/demo/launch_gradio.py
new file mode 100644
index 0000000000000000000000000000000000000000..69fe7143372f36ec34096cc7eca4dc92f7592f24
--- /dev/null
+++ b/demo/launch_gradio.py
@@ -0,0 +1,645 @@
+"""Gradio demo for visualizing VLM first token probability distributions with two images."""
+
+from typing import Any, Dict, List, Optional, Tuple
+
+import gradio as gr
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.nn.functional as F
+from matplotlib.figure import Figure
+from matplotlib.text import Text
+from PIL import Image
+
+from demo.lookup import ModelVariants, get_model_info # noqa: E402
+from src.main import get_model # noqa: E402
+from src.models.base import ModelBase # noqa: E402
+from src.models.config import Config, ModelSelection # noqa: E402
+
+models_cache: Dict[str, Any] = {}
+current_model_selection: Optional[ModelSelection] = None
+
+
+def read_layer_spec(spec_file_path: str) -> List[str]:
+ """Read available layers from the model spec file.
+
+ Args:
+ spec_file_path: Path to the model specification file.
+
+ Returns:
+ List of available layer names, skipping blank lines.
+ """
+ try:
+ with open(spec_file_path, 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+
+ # Filter out blank lines and strip whitespace
+ layers = [line.strip() for line in lines if line.strip()]
+ return layers
+
+ except FileNotFoundError:
+ print(f'Spec file not found: {spec_file_path}')
+ return ['Default layer (spec file not found)']
+ except Exception as e:
+ print(f'Error reading spec file: {str(e)}')
+ return ['Default layer (error reading spec)']
+
+
+def update_layer_choices(model_choice: str) -> Tuple[gr.Dropdown, gr.Button]:
+ """Update layer dropdown choices based on selected model.
+
+ Args:
+ model_choice: Selected model name.
+
+ Returns:
+ Updated dropdown component and button visibility.
+ """
+ if not model_choice:
+ return gr.Dropdown(choices=[], visible=False), gr.Button(visible=False)
+
+ try:
+ # Convert string choice to ModelVariants enum
+ model_var = ModelVariants(model_choice.lower())
+
+ # Get model info and read layer spec
+ _, _, model_spec_path = get_model_info(model_var)
+ layers = read_layer_spec(model_spec_path)
+
+ # Return updated dropdown with layer choices and make button visible
+ return (
+ gr.Dropdown(
+ choices=layers,
+ label=f'Select Module for {model_choice}',
+ value=layers[0] if layers else None,
+ visible=True,
+ interactive=True
+ ),
+ gr.Button('Analyze', variant='primary', visible=True)
+ )
+
+ except ValueError:
+ return (
+ gr.Dropdown(
+ choices=['Model not implemented'],
+ label='Select Module',
+ visible=True,
+ interactive=False
+ ),
+ gr.Button('Analyze', variant='primary', visible=False)
+ )
+ except Exception as e:
+ return (
+ gr.Dropdown(
+ choices=[f'Error: {str(e)}'],
+ label='Select Module',
+ visible=True,
+ interactive=False
+ ),
+ gr.Button('Analyze', variant='primary', visible=False)
+ )
+
+
+def load_model(model_var: ModelVariants, config: Config) -> ModelBase:
+ """Load the specified VLM and processor.
+
+ Args:
+ model_var: The model to load from ModelVariants enum.
+ config: The configuration object with model parameters.
+
+ Returns:
+ The loaded model instance.
+
+ Raises:
+ Exception: If model loading fails.
+ """
+ global models_cache, current_model_selection
+
+ model_key = model_var.value
+
+ # Check if model is already loaded
+ if model_key in models_cache:
+ current_model_selection = model_var
+ return models_cache[model_key]
+
+ print(f'Loading {model_var.value} model...')
+
+ try:
+ model_selection = config.architecture
+ model = get_model(config.architecture, config)
+
+ # Cache the loaded model and processor
+ models_cache[model_key] = model
+ current_model_selection = model_selection
+
+ print(f'{model_selection.value} model loaded successfully!')
+ return model
+
+ except Exception as e:
+ print(f'Error loading model {model_selection.value}: {str(e)}')
+ raise
+
+
+def get_single_image_probabilities(
+ instruction: str,
+ image: Image.Image,
+ vlm: ModelBase,
+ model_selection: ModelSelection,
+ top_k: int = 8
+) -> Tuple[List[str], np.ndarray]:
+ """Process a single image and return first token probabilities.
+
+ Args:
+ instruction: Text instruction for the model.
+ image: PIL Image to process.
+ vlm: Loaded model.
+ model_selection: The VLM being used.
+ top_k: Number of top tokens to return.
+
+ Returns:
+ Tuple containing list of top tokens and their probabilities.
+ """
+ # Generate prompt and process inputs
+ text = vlm._generate_prompt(instruction, has_images=True)
+ inputs = vlm._generate_processor_output(text, image)
+
+ with torch.no_grad():
+ outputs = vlm.model.generate(
+ **inputs,
+ max_new_tokens=1, # Only generate first token
+ output_scores=True,
+ return_dict_in_generate=True,
+ do_sample=False
+ )
+
+ # Get the logits for the first generated token
+ first_token_logits = outputs.scores[0][0] # Shape: [vocab_size]
+
+ # Convert logits to probabilities
+ probabilities = torch.softmax(first_token_logits, dim=-1)
+
+ # Get top-k probabilities for visualization
+ top_probs, top_indices = torch.topk(probabilities, top_k)
+
+ # Convert tokens back to text
+ top_tokens = [vlm.processor.tokenizer.decode([idx.item()]) for idx in top_indices]
+
+ return top_tokens, top_probs.cpu().numpy()
+
+
+def scale_figure_fonts(fig: Figure, factor: float = 1.5) -> None:
+ """Multiply all text sizes in a Matplotlib Figure by `factor`.
+
+ Args:
+ fig: The Matplotlib Figure to scale.
+ factor: The scaling factor (e.g., 1.5 to increase by 50%).
+ """
+ for ax in fig.get_axes():
+ # titles & axis labels
+ ax.title.set_fontsize(ax.title.get_fontsize() * factor)
+ ax.xaxis.label.set_size(ax.xaxis.label.get_size() * factor)
+ ax.yaxis.label.set_size(ax.yaxis.label.get_size() * factor)
+ # tick labels
+ for lbl in ax.get_xticklabels() + ax.get_yticklabels():
+ lbl.set_fontsize(lbl.get_fontsize() * factor)
+ # texts placed via ax.text(...) (e.g., numbers above bars / "No data" notes)
+ for t in ax.texts:
+ t.set_fontsize(t.get_fontsize() * factor)
+ # any stray Text artists attached to the figure (rare, but safe)
+ for t in fig.findobj(match=Text):
+ if t.figure is fig:
+ t.set_fontsize(t.get_fontsize() * factor)
+
+
+def create_dual_probability_plot(
+ tokens1: List[str], probabilities1: np.ndarray,
+ tokens2: List[str], probabilities2: np.ndarray
+) -> Figure:
+ """Create a matplotlib plot comparing token probabilities from two images.
+
+ Args:
+ tokens1: List of token strings from first image.
+ probabilities1: Array of probability values from first image.
+ tokens2: List of token strings from second image.
+ probabilities2: Array of probability values from second image.
+
+ Returns:
+ Matplotlib Figure object.
+ """
+ if len(tokens1) == 0 and len(tokens2) == 0:
+ fig, ax = plt.subplots(figsize=(15, 8))
+ ax.text(0.5, 0.5, 'No data to display',
+ horizontalalignment='center', verticalalignment='center')
+ ax.set_xlim(0, 1)
+ ax.set_ylim(0, 1)
+ return fig
+
+ # Unify y-range with padding (cap at 1.0)
+ max1 = float(np.max(probabilities1)) if len(tokens1) else 0.0
+ max2 = float(np.max(probabilities2)) if len(tokens2) else 0.0
+ y_upper = min(1.0, max(max1, max2) * 1.15 + 1e-6) # ~15% headroom
+
+ # Create subplots side by side with shared y
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 12), sharey=True)
+ ax1.set_ylim(0, y_upper)
+ ax2.set_ylim(0, y_upper)
+
+ # Plot first image results
+ if len(tokens1) > 0:
+ bars1 = ax1.bar(range(len(tokens1)), probabilities1, color='lightcoral',
+ edgecolor='darkred', alpha=0.7)
+ ax1.set_xlabel('Tokens', fontsize=12)
+ ax1.set_ylabel('Probability', fontsize=12)
+ ax1.set_title('Image 1 - First Token Probabilities',
+ fontsize=14, fontweight='bold')
+ ax1.set_xticks(range(len(tokens1)))
+ ax1.set_xticklabels(tokens1, rotation=45, ha='right')
+
+ # Clamp label position so it stays inside the axes
+ for bar, prob in zip(bars1, probabilities1):
+ h = bar.get_height()
+ y = min(h + 0.02 * y_upper, y_upper * 0.98)
+ ax1.text(bar.get_x() + bar.get_width()/2., y, f'{prob:.3f}',
+ ha='center', va='bottom', fontsize=9)
+
+ ax1.grid(axis='y', alpha=0.3)
+ else:
+ ax1.text(0.5, 0.5, 'No data for Image 1',
+ horizontalalignment='center', verticalalignment='center')
+ ax1.set_xlim(0, 1)
+ ax1.set_ylim(0, 1)
+
+ # Plot second image results
+ if len(tokens2) > 0:
+ bars2 = ax2.bar(range(len(tokens2)), probabilities2, color='skyblue',
+ edgecolor='navy', alpha=0.7)
+ ax2.set_xlabel('Tokens', fontsize=12)
+ ax2.set_ylabel('Probability', fontsize=12)
+ ax2.set_title('Image 2 - First Token Probabilities',
+ fontsize=14, fontweight='bold')
+ ax2.set_xticks(range(len(tokens2)))
+ ax2.set_xticklabels(tokens2, rotation=45, ha='right')
+
+ for bar, prob in zip(bars2, probabilities2):
+ h = bar.get_height()
+ y = min(h + 0.02 * y_upper, y_upper * 0.98)
+ ax2.text(bar.get_x() + bar.get_width()/2., y, f'{prob:.3f}',
+ ha='center', va='bottom', fontsize=9)
+
+ ax2.grid(axis='y', alpha=0.3)
+ else:
+ ax2.text(0.5, 0.5, 'No data for Image 2',
+ horizontalalignment='center', verticalalignment='center')
+ ax2.set_xlim(0, 1)
+ ax2.set_ylim(0, 1)
+
+ # Give extra space for rotated tick labels
+ fig.tight_layout()
+ fig.subplots_adjust(bottom=0.18)
+
+ return fig
+
+
+def get_module_similarity_pooled(
+ vlm: ModelBase,
+ module_name: str,
+ image1: Image.Image,
+ image2: Image.Image,
+ instruction: str,
+ pooling: str = 'mean'
+) -> float:
+ """Compute cosine similarity with optional pooling strategies.
+
+ Args:
+ vlm: The loaded VLM (ModelBase instance).
+ module_name: The layer/module name to extract features from.
+ image1: First PIL Image.
+ image2: Second PIL Image.
+ instruction: Text instruction for the model.
+ pooling: Pooling strategy - 'mean', 'max', 'cls', or 'none'.
+
+ Returns:
+ Cosine similarity value between the two embeddings.
+
+ Raises:
+ ValueError: If feature extraction fails or module not found.
+ """
+ embeddings = {}
+ target_module = None
+
+ def hook_fn(
+ module: torch.nn.Module,
+ input: Any,
+ output: Any
+ ) -> None:
+ """Forward hook to capture module output.
+
+ Args:
+ module: The module being hooked.
+ input: The input to the module.
+ output: The output from the module.
+ """
+ if isinstance(output, tuple):
+ embeddings['activation'] = output[0].detach()
+ else:
+ embeddings['activation'] = output.detach()
+
+ # Find and register hook
+ for name, module in vlm.model.named_modules():
+ if name == module_name:
+ target_module = module
+ hook_handle = module.register_forward_hook(hook_fn)
+ break
+
+ if target_module is None:
+ raise ValueError(f"Module '{module_name}' not found in model")
+
+ try:
+ # Extract embedding for image1
+ text = vlm._generate_prompt(instruction, has_images=True)
+ inputs1 = vlm._generate_processor_output(text, image1)
+
+ embeddings.clear()
+ with torch.no_grad():
+ _ = vlm.model(**inputs1)
+
+ if 'activation' not in embeddings:
+ raise ValueError('Failed to extract features for image1')
+
+ embedding1 = embeddings['activation']
+
+ # Extract embedding for image2
+ inputs2 = vlm._generate_processor_output(text, image2)
+
+ embeddings.clear()
+ with torch.no_grad():
+ _ = vlm.model(**inputs2)
+
+ if 'activation' not in embeddings:
+ raise ValueError('Failed to extract features for image2')
+
+ embedding2 = embeddings['activation']
+
+ # Apply pooling strategy
+ if pooling == 'mean':
+ # Mean pooling across sequence dimension
+ if embedding1.dim() >= 2:
+ embedding1_pooled = embedding1.mean(dim=1)
+ embedding2_pooled = embedding2.mean(dim=1)
+ else:
+ embedding1_pooled = embedding1
+ embedding2_pooled = embedding2
+
+ elif pooling == 'max':
+ # Max pooling across sequence dimension
+ if embedding1.dim() >= 2:
+ embedding1_pooled = embedding1.max(dim=1)[0]
+ embedding2_pooled = embedding2.max(dim=1)[0]
+ else:
+ embedding1_pooled = embedding1
+ embedding2_pooled = embedding2
+
+ elif pooling == 'cls':
+ # Use first token (CLS token)
+ if embedding1.dim() >= 2:
+ embedding1_pooled = embedding1[:, 0, :]
+ embedding2_pooled = embedding2[:, 0, :]
+ else:
+ embedding1_pooled = embedding1
+ embedding2_pooled = embedding2
+
+ elif pooling == 'none':
+ # Flatten without pooling
+ embedding1_pooled = embedding1.reshape(embedding1.shape[0], -1)
+ embedding2_pooled = embedding2.reshape(embedding2.shape[0], -1)
+ else:
+ raise ValueError(f'Unknown pooling strategy: {pooling}')
+
+ # Ensure 2D shape [batch, features]
+ if embedding1_pooled.dim() == 1:
+ embedding1_pooled = embedding1_pooled.unsqueeze(0)
+ embedding2_pooled = embedding2_pooled.unsqueeze(0)
+
+ # Compute cosine similarity
+ similarity = F.cosine_similarity(embedding1_pooled, embedding2_pooled, dim=1)
+ similarity_value = float(similarity.mean().cpu().item())
+
+ return similarity_value
+
+ finally:
+ hook_handle.remove()
+
+
+def process_dual_inputs(
+ model_choice: str,
+ selected_layer: str,
+ instruction: str,
+ image1: Optional[Image.Image],
+ image2: Optional[Image.Image],
+ top_k: int = 8
+) -> Tuple[Optional[Figure], str]:
+ """Main function to process dual inputs and return comparison plot.
+
+ Args:
+ model_choice: String name of the selected model.
+ selected_layer: String name of the selected layer.
+ instruction: Text instruction for the model.
+ image1: First PIL Image to process, can be None.
+ image2: Second PIL Image to process, can be None.
+ top_k: Number of top tokens to display.
+
+ Returns:
+ Tuple containing the plot figure and info text.
+ """
+ if image1 is None and image2 is None:
+ return None, 'Please upload at least one image.'
+
+ if not instruction.strip():
+ return None, 'Please provide an instruction.'
+
+ if not model_choice:
+ return None, 'Please select a model.'
+
+ if not selected_layer:
+ return None, 'Please select a layer.'
+
+ try:
+ # Initialize a config
+ model_var = ModelVariants(model_choice.lower())
+ model_selection, model_path, _ = get_model_info(model_var)
+ config = Config(model_selection, model_path, selected_layer, instruction)
+ config.model = {
+ 'torch_dtype': torch.float16,
+ 'low_cpu_mem_usage': True,
+ 'device_map': 'auto'
+ }
+
+ # Load the model
+ model = load_model(model_var, config)
+
+ # Handle cases where only one image is provided
+ if image1 is None:
+ image1 = image2
+ tokens1, probs1 = [], np.array([])
+ tokens2, probs2 = get_single_image_probabilities(
+ instruction, image2, model, model_selection, top_k
+ )
+ elif image2 is None:
+ image2 = image1
+ tokens1, probs1 = get_single_image_probabilities(
+ instruction, image1, model, model_selection, top_k
+ )
+ tokens2, probs2 = [], np.array([])
+ else:
+ tokens1, probs1 = get_single_image_probabilities(
+ instruction, image1, model, model_selection, top_k
+ )
+ tokens2, probs2 = get_single_image_probabilities(
+ instruction, image2, model, model_selection, top_k
+ )
+
+ if len(tokens1) == 0 and len(tokens2) == 0:
+ return None, 'Error: Could not process the inputs. Please check the model loading.'
+
+ # Create comparison plot
+ plot = create_dual_probability_plot(
+ tokens1, probs1, tokens2, probs2
+ )
+ scale_figure_fonts(plot, factor=1.25)
+
+ # Create info text
+ info_text = f'Model: {model_choice.upper()}\n'
+ info_text += f'Top-K: {top_k}\n'
+ info_text += f"Instruction: '{instruction}'\n\n"
+
+ if len(tokens1) > 0:
+ info_text += f"Image 1 - Top token: '{tokens1[0]}' (probability: {probs1[0]:.4f})\n"
+ else:
+ info_text += 'Image 1 - No data\n'
+
+ if len(tokens2) > 0:
+ info_text += f"Image 2 - Top token: '{tokens2[0]}' (probability: {probs2[0]:.4f})\n"
+ else:
+ info_text += 'Image 2 - No data\n'
+
+ if len(tokens1) > 0 and len(tokens2) > 0:
+ info_text += f'\nLayer: {selected_layer}\n'
+ similarity = get_module_similarity_pooled(model, selected_layer, image1, image2, instruction)
+ info_text += f'Cosine similarity between Image 1 and 2: {similarity:.3f}\n'
+
+ return plot, info_text
+
+ except ValueError as e:
+ return None, f'Invalid model selection: {str(e)}'
+ except Exception as e:
+ return None, f'Error: {str(e)}'
+
+
+def create_demo() -> gr.Blocks:
+ """Create and configure the Gradio demo interface for dual image comparison.
+
+ Returns:
+ Configured Gradio Blocks interface.
+ """
+ with gr.Blocks(title='VLM-Lens Visualizer') as demo:
+ gr.Markdown("""
+ # VLM-Lens Demo
+
+ This VLM-Lens demo processes an instruction with up to two images through various Vision-Language Models (VLMs)
+ and visualizes the probability distribution of the first token in the response for each image.
+
+ **Instructions:**
+ 1. Select a VLM from the dropdown
+ 2. Select a layer from the available embedding layers
+ 3. Upload two images for comparison
+ 4. Enter your instruction/question about the images
+ 5. Adjust the number of top tokens to display (1-20)
+ 6. Click "Analyze" to see the first token probability distributions side by side
+
+ **Note:** You can upload just one image if you prefer single image analysis.
+ """)
+
+ with gr.Row():
+ with gr.Column():
+ model_dropdown = gr.Dropdown(
+ choices=[v.value.capitalize() for v in ModelVariants],
+ label='Select VLM',
+ value=None,
+ interactive=True
+ )
+
+ layer_dropdown = gr.Dropdown(
+ choices=[],
+ label='Select Module',
+ visible=False,
+ interactive=True
+ )
+
+ instruction_input = gr.Textbox(
+ label='Instruction',
+ placeholder='Describe what you see in this image...',
+ lines=3
+ )
+
+ top_k_slider = gr.Slider(
+ minimum=1,
+ maximum=20,
+ value=8,
+ step=1,
+ label='Number of Top Tokens to Display',
+ info='Select how many top probability tokens to show in the visualization'
+ )
+
+ with gr.Row():
+ image1_input = gr.Image(
+ label='Upload Image 1',
+ type='pil'
+ )
+ image2_input = gr.Image(
+ label='Upload Image 2',
+ type='pil'
+ )
+
+ analyze_btn = gr.Button('Analyze', variant='primary', visible=False)
+
+ with gr.Column():
+ plot_output = gr.Plot(label='First Token Probability Distribution Comparison')
+ info_output = gr.Textbox(
+ label='Analysis Info',
+ lines=8,
+ interactive=False
+ )
+
+ # Set up event handlers
+ model_dropdown.change(
+ fn=update_layer_choices,
+ inputs=[model_dropdown],
+ outputs=[layer_dropdown, analyze_btn]
+ )
+
+ analyze_btn.click(
+ fn=process_dual_inputs,
+ inputs=[model_dropdown, layer_dropdown, instruction_input, image1_input, image2_input, top_k_slider],
+ outputs=[plot_output, info_output]
+ )
+
+ # Add examples
+ gr.Examples(
+ examples=[
+ ['What is in this image? Describe in one word.', None, None],
+ ['Describe the main object in the picture in one word.', None, None],
+ ['What color is the dominant object? Describe in one word.', None, None],
+ ],
+ inputs=[instruction_input, image1_input, image2_input]
+ )
+
+ return demo
+
+
+if __name__ == '__main__':
+ # Create and launch the demo
+ demo = create_demo()
+ demo.launch(
+ share=True,
+ server_name='0.0.0.0',
+ server_port=7860
+ )
diff --git a/demo/lookup.py b/demo/lookup.py
new file mode 100644
index 0000000000000000000000000000000000000000..9182130cc278f927b3294ed8c03b21608fcd97c7
--- /dev/null
+++ b/demo/lookup.py
@@ -0,0 +1,171 @@
+"""Model info lookup utilities."""
+
+import os
+from enum import Enum
+from pathlib import Path
+from typing import Tuple
+
+from src.models.config import ModelSelection
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SPECS_DIR = Path(os.getenv('MODEL_SPECS_DIR', REPO_ROOT / 'logs'))
+
+# TODO: To store local model weights in the repo, also define:
+# MODELS_DIR = Path(os.getenv('MODELS_DIR', REPO_ROOT / 'checkpoints'))
+
+
+class ModelVariants(str, Enum):
+ """Enum that contains all possible model variants."""
+ AYA_VISION_8B = 'aya-vision-8b'
+ BLIP2_3B = 'blip2-opt-2.7b'
+ COGVLM_17B = 'cogvlm-17b'
+ GLAMM_7B = 'glamm-7b'
+ INTERNLM_XC_25_7B = 'internlm-xcomposer2.5-7b'
+ INTERNVL_25_8B = 'internvl-2.5-8b'
+ JANUS_1B = 'janus-pro-1b'
+ LLAVA_15_7B = 'llava-1.5-7b'
+ MINICPM_O_26_8B = 'minicpm-o-2.6-8b'
+ MINICPM_V_20_3B = 'minicpm-v-2.0-2.8b'
+ MOLMO_7B = 'molmo-7b'
+ PALIGEMMA_3B = 'paligemma-3b'
+ PIXTRAL_12B = 'pixtral-12b'
+ PERCEPTION_LM_1B = 'perception-lm-1b'
+ QWENVL_20_2B = 'qwen2-vl-2b-instruct'
+ QWENVL_20_7B = 'qwen2-vl-7b-instruct'
+ # TODO: Add more models here as needed.
+
+
+# ---- Mapping ----
+# model_path: can be a local path or a HF repo id string
+# model_spec: absolute Path to the .txt file (we'll return a repo-root-relative string)
+_MODEL_MAPPING: dict[ModelVariants, dict[ModelSelection, str, str | Path]] = {
+ ModelVariants.AYA_VISION_8B: {
+ 'model_arch': ModelSelection.AYA_VISION,
+ 'model_path': 'CohereLabs/aya-vision-8b',
+ 'model_spec': SPECS_DIR / 'CohereLabs' / 'aya-vision-8b.txt',
+ },
+ ModelVariants.BLIP2_3B: {
+ 'model_arch': ModelSelection.BLIP2,
+ 'model_path': 'Salesforce/blip2-opt-2.7b',
+ 'model_spec': SPECS_DIR / 'Salesforce' / 'blip2-opt-2.7b.txt',
+ },
+ ModelVariants.COGVLM_17B: {
+ 'model_arch': ModelSelection.COGVLM,
+ 'model_path': 'THUDM/cogvlm-chat-hf',
+ 'model_spec': SPECS_DIR / 'THUDM' / 'cogvlm-chat-hf.txt',
+ },
+ ModelVariants.GLAMM_7B: {
+ 'model_arch': ModelSelection.GLAMM,
+ 'model_path': 'MBZUAI/GLaMM-FullScope',
+ 'model_spec': SPECS_DIR / 'MBZUAI' / 'GLaMM-FullScope.txt',
+ },
+ ModelVariants.INTERNLM_XC_25_7B: {
+ 'model_arch': ModelSelection.INTERNLM_XC,
+ 'model_path': 'internlm/internlm-xcomposer2d5-7b',
+ 'model_spec': SPECS_DIR / 'internlm' / 'internlm-xcomposer2d5-7b.txt',
+ },
+ ModelVariants.INTERNVL_25_8B: {
+ 'model_arch': ModelSelection.INTERNVL,
+ 'model_path': 'OpenGVLab/InternVL2_5-8B',
+ 'model_spec': SPECS_DIR / 'internvl' / 'InternVL2_5-8B.txt',
+ },
+ ModelVariants.JANUS_1B: {
+ 'model_arch': ModelSelection.JANUS,
+ 'model_path': 'deepseek-community/Janus-Pro-1B',
+ 'model_spec': SPECS_DIR / 'deepseek-community' / 'Janus-Pro-1B.txt',
+ },
+ ModelVariants.LLAVA_15_7B: {
+ 'model_arch': ModelSelection.LLAVA,
+ 'model_path': 'llava-hf/llava-1.5-7b-hf',
+ 'model_spec': SPECS_DIR / 'llava-hf' / 'llava-1.5-7b-hf.txt',
+ },
+ ModelVariants.MINICPM_O_26_8B: {
+ 'model_arch': ModelSelection.MINICPM,
+ 'model_path': 'openbmb/MiniCPM-o-2_6',
+ 'model_spec': SPECS_DIR / 'openbmb' / 'MiniCPM-o-2_6.txt',
+ },
+ ModelVariants.MINICPM_V_20_3B: {
+ 'model_arch': ModelSelection.MINICPM,
+ 'model_path': 'compling/MiniCPM-V-2',
+ 'model_spec': SPECS_DIR / 'wonderwind271' / 'MiniCPM-V-2.txt',
+ },
+ ModelVariants.MOLMO_7B: {
+ 'model_arch': ModelSelection.MOLMO,
+ 'model_path': 'allenai/Molmo-7B-D-0924',
+ 'model_spec': SPECS_DIR / 'allenai' / 'Molmo-7B-D-0924.txt',
+ },
+ ModelVariants.PALIGEMMA_3B: {
+ 'model_arch': ModelSelection.PALIGEMMA,
+ 'model_path': 'google/paligemma-3b-mix-224',
+ 'model_spec': SPECS_DIR / 'paligemma' / 'paligemma-3b.txt',
+ },
+ ModelVariants.PIXTRAL_12B: {
+ 'model_arch': ModelSelection.PIXTRAL,
+ 'model_path': 'mistralai/Pixtral-12B-2409',
+ 'model_spec': SPECS_DIR / 'mistralai' / 'Pixtral-12B-2409.txt',
+ },
+ ModelVariants.PERCEPTION_LM_1B: {
+ 'model_arch': ModelSelection.PLM,
+ 'model_path': 'facebook/Perception-LM-1B',
+ 'model_spec': SPECS_DIR / 'facebook' / 'Perception-LM-1B.txt',
+ },
+ ModelVariants.QWENVL_20_2B: {
+ 'model_arch': ModelSelection.QWEN,
+ 'model_path': 'Qwen/Qwen2-VL-2B-Instruct',
+ 'model_spec': SPECS_DIR / 'Qwen' / 'Qwen2-VL-2B-Instruct.txt',
+ },
+ ModelVariants.QWENVL_20_7B: {
+ 'model_arch': ModelSelection.QWEN,
+ 'model_path': 'Qwen/Qwen2-VL-7B-Instruct',
+ 'model_spec': SPECS_DIR / 'Qwen' / 'Qwen2-VL-7B-Instruct.txt',
+ },
+ # TODO: Add more models here as needed.
+}
+
+
+def _to_repo_relative(p: Path) -> str:
+ """Convert a path to a repo-root–relative string if possible.
+
+ Args:
+ p (Path): The path to convert.
+
+ Returns:
+ str: `p` relative to ``REPO_ROOT`` if `p` is within it; otherwise the
+ absolute path as a string.
+ """
+ try:
+ return str(p.resolve().relative_to(REPO_ROOT))
+ except ValueError:
+ return str(p)
+
+
+def get_model_info(model_var: ModelVariants) -> Tuple[ModelSelection, str, str]:
+ """Return the model path and spec link for a given selection.
+
+ Args:
+ model_var (ModelVariants): The model variant to look up.
+
+ Returns:
+ Tuple[ModelSelection, str, str]:
+ A triple of ``(model_selection, model_path, link_to_model_spec)`` where
+ `model_selection` is a ModelSelection enum entry,
+ `model_path` is an HF repo id or local path, and
+ `link_to_model_spec` is a repo-root-relative path to the spec ``.txt``.
+
+ Raises:
+ KeyError: If the provided `model` is unknown / not in the mapping.
+ FileNotFoundError: If the resolved spec file does not exist.
+ """
+ try:
+ info = _MODEL_MAPPING[model_var]
+ except KeyError as e:
+ raise KeyError(f'Unknown model: {model_var!r}') from e
+
+ model_selection = ModelSelection(info['model_arch'])
+ model_path = str(info['model_path'])
+ spec_path = Path(info['model_spec']).resolve()
+
+ if not spec_path.exists():
+ raise FileNotFoundError(f'Spec file not found: {spec_path}')
+
+ return model_selection, model_path, _to_repo_relative(spec_path)
diff --git a/demo/requirements.txt b/demo/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..60590d415cac6728360ec7989783e29007f0d86a
--- /dev/null
+++ b/demo/requirements.txt
@@ -0,0 +1,2 @@
+gradio==5.47.2
+accelerate==1.10.1
diff --git a/logs/CohereLabs/aya-vision-8b.txt b/logs/CohereLabs/aya-vision-8b.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a98c0075966e06e832b05581764316b3f42f06ed
--- /dev/null
+++ b/logs/CohereLabs/aya-vision-8b.txt
@@ -0,0 +1,729 @@
+
+vision_tower
+vision_tower.vision_model
+vision_tower.vision_model.embeddings
+vision_tower.vision_model.embeddings.patch_embedding
+vision_tower.vision_model.embeddings.position_embedding
+vision_tower.vision_model.encoder
+vision_tower.vision_model.encoder.layers
+vision_tower.vision_model.encoder.layers.0
+vision_tower.vision_model.encoder.layers.0.self_attn
+vision_tower.vision_model.encoder.layers.0.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.0.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.0.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.0.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.0.layer_norm1
+vision_tower.vision_model.encoder.layers.0.mlp
+vision_tower.vision_model.encoder.layers.0.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.0.mlp.fc1
+vision_tower.vision_model.encoder.layers.0.mlp.fc2
+vision_tower.vision_model.encoder.layers.0.layer_norm2
+vision_tower.vision_model.encoder.layers.1
+vision_tower.vision_model.encoder.layers.1.self_attn
+vision_tower.vision_model.encoder.layers.1.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.1.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.1.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.1.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.1.layer_norm1
+vision_tower.vision_model.encoder.layers.1.mlp
+vision_tower.vision_model.encoder.layers.1.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.1.mlp.fc1
+vision_tower.vision_model.encoder.layers.1.mlp.fc2
+vision_tower.vision_model.encoder.layers.1.layer_norm2
+vision_tower.vision_model.encoder.layers.2
+vision_tower.vision_model.encoder.layers.2.self_attn
+vision_tower.vision_model.encoder.layers.2.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.2.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.2.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.2.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.2.layer_norm1
+vision_tower.vision_model.encoder.layers.2.mlp
+vision_tower.vision_model.encoder.layers.2.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.2.mlp.fc1
+vision_tower.vision_model.encoder.layers.2.mlp.fc2
+vision_tower.vision_model.encoder.layers.2.layer_norm2
+vision_tower.vision_model.encoder.layers.3
+vision_tower.vision_model.encoder.layers.3.self_attn
+vision_tower.vision_model.encoder.layers.3.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.3.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.3.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.3.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.3.layer_norm1
+vision_tower.vision_model.encoder.layers.3.mlp
+vision_tower.vision_model.encoder.layers.3.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.3.mlp.fc1
+vision_tower.vision_model.encoder.layers.3.mlp.fc2
+vision_tower.vision_model.encoder.layers.3.layer_norm2
+vision_tower.vision_model.encoder.layers.4
+vision_tower.vision_model.encoder.layers.4.self_attn
+vision_tower.vision_model.encoder.layers.4.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.4.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.4.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.4.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.4.layer_norm1
+vision_tower.vision_model.encoder.layers.4.mlp
+vision_tower.vision_model.encoder.layers.4.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.4.mlp.fc1
+vision_tower.vision_model.encoder.layers.4.mlp.fc2
+vision_tower.vision_model.encoder.layers.4.layer_norm2
+vision_tower.vision_model.encoder.layers.5
+vision_tower.vision_model.encoder.layers.5.self_attn
+vision_tower.vision_model.encoder.layers.5.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.5.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.5.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.5.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.5.layer_norm1
+vision_tower.vision_model.encoder.layers.5.mlp
+vision_tower.vision_model.encoder.layers.5.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.5.mlp.fc1
+vision_tower.vision_model.encoder.layers.5.mlp.fc2
+vision_tower.vision_model.encoder.layers.5.layer_norm2
+vision_tower.vision_model.encoder.layers.6
+vision_tower.vision_model.encoder.layers.6.self_attn
+vision_tower.vision_model.encoder.layers.6.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.6.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.6.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.6.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.6.layer_norm1
+vision_tower.vision_model.encoder.layers.6.mlp
+vision_tower.vision_model.encoder.layers.6.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.6.mlp.fc1
+vision_tower.vision_model.encoder.layers.6.mlp.fc2
+vision_tower.vision_model.encoder.layers.6.layer_norm2
+vision_tower.vision_model.encoder.layers.7
+vision_tower.vision_model.encoder.layers.7.self_attn
+vision_tower.vision_model.encoder.layers.7.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.7.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.7.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.7.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.7.layer_norm1
+vision_tower.vision_model.encoder.layers.7.mlp
+vision_tower.vision_model.encoder.layers.7.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.7.mlp.fc1
+vision_tower.vision_model.encoder.layers.7.mlp.fc2
+vision_tower.vision_model.encoder.layers.7.layer_norm2
+vision_tower.vision_model.encoder.layers.8
+vision_tower.vision_model.encoder.layers.8.self_attn
+vision_tower.vision_model.encoder.layers.8.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.8.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.8.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.8.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.8.layer_norm1
+vision_tower.vision_model.encoder.layers.8.mlp
+vision_tower.vision_model.encoder.layers.8.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.8.mlp.fc1
+vision_tower.vision_model.encoder.layers.8.mlp.fc2
+vision_tower.vision_model.encoder.layers.8.layer_norm2
+vision_tower.vision_model.encoder.layers.9
+vision_tower.vision_model.encoder.layers.9.self_attn
+vision_tower.vision_model.encoder.layers.9.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.9.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.9.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.9.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.9.layer_norm1
+vision_tower.vision_model.encoder.layers.9.mlp
+vision_tower.vision_model.encoder.layers.9.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.9.mlp.fc1
+vision_tower.vision_model.encoder.layers.9.mlp.fc2
+vision_tower.vision_model.encoder.layers.9.layer_norm2
+vision_tower.vision_model.encoder.layers.10
+vision_tower.vision_model.encoder.layers.10.self_attn
+vision_tower.vision_model.encoder.layers.10.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.10.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.10.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.10.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.10.layer_norm1
+vision_tower.vision_model.encoder.layers.10.mlp
+vision_tower.vision_model.encoder.layers.10.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.10.mlp.fc1
+vision_tower.vision_model.encoder.layers.10.mlp.fc2
+vision_tower.vision_model.encoder.layers.10.layer_norm2
+vision_tower.vision_model.encoder.layers.11
+vision_tower.vision_model.encoder.layers.11.self_attn
+vision_tower.vision_model.encoder.layers.11.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.11.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.11.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.11.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.11.layer_norm1
+vision_tower.vision_model.encoder.layers.11.mlp
+vision_tower.vision_model.encoder.layers.11.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.11.mlp.fc1
+vision_tower.vision_model.encoder.layers.11.mlp.fc2
+vision_tower.vision_model.encoder.layers.11.layer_norm2
+vision_tower.vision_model.encoder.layers.12
+vision_tower.vision_model.encoder.layers.12.self_attn
+vision_tower.vision_model.encoder.layers.12.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.12.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.12.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.12.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.12.layer_norm1
+vision_tower.vision_model.encoder.layers.12.mlp
+vision_tower.vision_model.encoder.layers.12.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.12.mlp.fc1
+vision_tower.vision_model.encoder.layers.12.mlp.fc2
+vision_tower.vision_model.encoder.layers.12.layer_norm2
+vision_tower.vision_model.encoder.layers.13
+vision_tower.vision_model.encoder.layers.13.self_attn
+vision_tower.vision_model.encoder.layers.13.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.13.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.13.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.13.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.13.layer_norm1
+vision_tower.vision_model.encoder.layers.13.mlp
+vision_tower.vision_model.encoder.layers.13.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.13.mlp.fc1
+vision_tower.vision_model.encoder.layers.13.mlp.fc2
+vision_tower.vision_model.encoder.layers.13.layer_norm2
+vision_tower.vision_model.encoder.layers.14
+vision_tower.vision_model.encoder.layers.14.self_attn
+vision_tower.vision_model.encoder.layers.14.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.14.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.14.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.14.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.14.layer_norm1
+vision_tower.vision_model.encoder.layers.14.mlp
+vision_tower.vision_model.encoder.layers.14.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.14.mlp.fc1
+vision_tower.vision_model.encoder.layers.14.mlp.fc2
+vision_tower.vision_model.encoder.layers.14.layer_norm2
+vision_tower.vision_model.encoder.layers.15
+vision_tower.vision_model.encoder.layers.15.self_attn
+vision_tower.vision_model.encoder.layers.15.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.15.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.15.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.15.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.15.layer_norm1
+vision_tower.vision_model.encoder.layers.15.mlp
+vision_tower.vision_model.encoder.layers.15.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.15.mlp.fc1
+vision_tower.vision_model.encoder.layers.15.mlp.fc2
+vision_tower.vision_model.encoder.layers.15.layer_norm2
+vision_tower.vision_model.encoder.layers.16
+vision_tower.vision_model.encoder.layers.16.self_attn
+vision_tower.vision_model.encoder.layers.16.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.16.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.16.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.16.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.16.layer_norm1
+vision_tower.vision_model.encoder.layers.16.mlp
+vision_tower.vision_model.encoder.layers.16.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.16.mlp.fc1
+vision_tower.vision_model.encoder.layers.16.mlp.fc2
+vision_tower.vision_model.encoder.layers.16.layer_norm2
+vision_tower.vision_model.encoder.layers.17
+vision_tower.vision_model.encoder.layers.17.self_attn
+vision_tower.vision_model.encoder.layers.17.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.17.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.17.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.17.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.17.layer_norm1
+vision_tower.vision_model.encoder.layers.17.mlp
+vision_tower.vision_model.encoder.layers.17.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.17.mlp.fc1
+vision_tower.vision_model.encoder.layers.17.mlp.fc2
+vision_tower.vision_model.encoder.layers.17.layer_norm2
+vision_tower.vision_model.encoder.layers.18
+vision_tower.vision_model.encoder.layers.18.self_attn
+vision_tower.vision_model.encoder.layers.18.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.18.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.18.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.18.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.18.layer_norm1
+vision_tower.vision_model.encoder.layers.18.mlp
+vision_tower.vision_model.encoder.layers.18.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.18.mlp.fc1
+vision_tower.vision_model.encoder.layers.18.mlp.fc2
+vision_tower.vision_model.encoder.layers.18.layer_norm2
+vision_tower.vision_model.encoder.layers.19
+vision_tower.vision_model.encoder.layers.19.self_attn
+vision_tower.vision_model.encoder.layers.19.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.19.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.19.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.19.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.19.layer_norm1
+vision_tower.vision_model.encoder.layers.19.mlp
+vision_tower.vision_model.encoder.layers.19.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.19.mlp.fc1
+vision_tower.vision_model.encoder.layers.19.mlp.fc2
+vision_tower.vision_model.encoder.layers.19.layer_norm2
+vision_tower.vision_model.encoder.layers.20
+vision_tower.vision_model.encoder.layers.20.self_attn
+vision_tower.vision_model.encoder.layers.20.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.20.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.20.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.20.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.20.layer_norm1
+vision_tower.vision_model.encoder.layers.20.mlp
+vision_tower.vision_model.encoder.layers.20.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.20.mlp.fc1
+vision_tower.vision_model.encoder.layers.20.mlp.fc2
+vision_tower.vision_model.encoder.layers.20.layer_norm2
+vision_tower.vision_model.encoder.layers.21
+vision_tower.vision_model.encoder.layers.21.self_attn
+vision_tower.vision_model.encoder.layers.21.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.21.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.21.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.21.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.21.layer_norm1
+vision_tower.vision_model.encoder.layers.21.mlp
+vision_tower.vision_model.encoder.layers.21.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.21.mlp.fc1
+vision_tower.vision_model.encoder.layers.21.mlp.fc2
+vision_tower.vision_model.encoder.layers.21.layer_norm2
+vision_tower.vision_model.encoder.layers.22
+vision_tower.vision_model.encoder.layers.22.self_attn
+vision_tower.vision_model.encoder.layers.22.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.22.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.22.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.22.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.22.layer_norm1
+vision_tower.vision_model.encoder.layers.22.mlp
+vision_tower.vision_model.encoder.layers.22.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.22.mlp.fc1
+vision_tower.vision_model.encoder.layers.22.mlp.fc2
+vision_tower.vision_model.encoder.layers.22.layer_norm2
+vision_tower.vision_model.encoder.layers.23
+vision_tower.vision_model.encoder.layers.23.self_attn
+vision_tower.vision_model.encoder.layers.23.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.23.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.23.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.23.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.23.layer_norm1
+vision_tower.vision_model.encoder.layers.23.mlp
+vision_tower.vision_model.encoder.layers.23.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.23.mlp.fc1
+vision_tower.vision_model.encoder.layers.23.mlp.fc2
+vision_tower.vision_model.encoder.layers.23.layer_norm2
+vision_tower.vision_model.encoder.layers.24
+vision_tower.vision_model.encoder.layers.24.self_attn
+vision_tower.vision_model.encoder.layers.24.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.24.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.24.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.24.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.24.layer_norm1
+vision_tower.vision_model.encoder.layers.24.mlp
+vision_tower.vision_model.encoder.layers.24.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.24.mlp.fc1
+vision_tower.vision_model.encoder.layers.24.mlp.fc2
+vision_tower.vision_model.encoder.layers.24.layer_norm2
+vision_tower.vision_model.encoder.layers.25
+vision_tower.vision_model.encoder.layers.25.self_attn
+vision_tower.vision_model.encoder.layers.25.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.25.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.25.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.25.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.25.layer_norm1
+vision_tower.vision_model.encoder.layers.25.mlp
+vision_tower.vision_model.encoder.layers.25.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.25.mlp.fc1
+vision_tower.vision_model.encoder.layers.25.mlp.fc2
+vision_tower.vision_model.encoder.layers.25.layer_norm2
+vision_tower.vision_model.encoder.layers.26
+vision_tower.vision_model.encoder.layers.26.self_attn
+vision_tower.vision_model.encoder.layers.26.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.26.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.26.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.26.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.26.layer_norm1
+vision_tower.vision_model.encoder.layers.26.mlp
+vision_tower.vision_model.encoder.layers.26.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.26.mlp.fc1
+vision_tower.vision_model.encoder.layers.26.mlp.fc2
+vision_tower.vision_model.encoder.layers.26.layer_norm2
+vision_tower.vision_model.post_layernorm
+multi_modal_projector
+multi_modal_projector.layernorm
+multi_modal_projector.linear_1
+multi_modal_projector.act
+multi_modal_projector.linear_2
+language_model
+language_model.model
+language_model.model.embed_tokens
+language_model.model.layers
+language_model.model.layers.0
+language_model.model.layers.0.self_attn
+language_model.model.layers.0.self_attn.q_proj
+language_model.model.layers.0.self_attn.k_proj
+language_model.model.layers.0.self_attn.v_proj
+language_model.model.layers.0.self_attn.o_proj
+language_model.model.layers.0.mlp
+language_model.model.layers.0.mlp.gate_proj
+language_model.model.layers.0.mlp.up_proj
+language_model.model.layers.0.mlp.down_proj
+language_model.model.layers.0.mlp.act_fn
+language_model.model.layers.0.input_layernorm
+language_model.model.layers.1
+language_model.model.layers.1.self_attn
+language_model.model.layers.1.self_attn.q_proj
+language_model.model.layers.1.self_attn.k_proj
+language_model.model.layers.1.self_attn.v_proj
+language_model.model.layers.1.self_attn.o_proj
+language_model.model.layers.1.mlp
+language_model.model.layers.1.mlp.gate_proj
+language_model.model.layers.1.mlp.up_proj
+language_model.model.layers.1.mlp.down_proj
+language_model.model.layers.1.mlp.act_fn
+language_model.model.layers.1.input_layernorm
+language_model.model.layers.2
+language_model.model.layers.2.self_attn
+language_model.model.layers.2.self_attn.q_proj
+language_model.model.layers.2.self_attn.k_proj
+language_model.model.layers.2.self_attn.v_proj
+language_model.model.layers.2.self_attn.o_proj
+language_model.model.layers.2.mlp
+language_model.model.layers.2.mlp.gate_proj
+language_model.model.layers.2.mlp.up_proj
+language_model.model.layers.2.mlp.down_proj
+language_model.model.layers.2.mlp.act_fn
+language_model.model.layers.2.input_layernorm
+language_model.model.layers.3
+language_model.model.layers.3.self_attn
+language_model.model.layers.3.self_attn.q_proj
+language_model.model.layers.3.self_attn.k_proj
+language_model.model.layers.3.self_attn.v_proj
+language_model.model.layers.3.self_attn.o_proj
+language_model.model.layers.3.mlp
+language_model.model.layers.3.mlp.gate_proj
+language_model.model.layers.3.mlp.up_proj
+language_model.model.layers.3.mlp.down_proj
+language_model.model.layers.3.mlp.act_fn
+language_model.model.layers.3.input_layernorm
+language_model.model.layers.4
+language_model.model.layers.4.self_attn
+language_model.model.layers.4.self_attn.q_proj
+language_model.model.layers.4.self_attn.k_proj
+language_model.model.layers.4.self_attn.v_proj
+language_model.model.layers.4.self_attn.o_proj
+language_model.model.layers.4.mlp
+language_model.model.layers.4.mlp.gate_proj
+language_model.model.layers.4.mlp.up_proj
+language_model.model.layers.4.mlp.down_proj
+language_model.model.layers.4.mlp.act_fn
+language_model.model.layers.4.input_layernorm
+language_model.model.layers.5
+language_model.model.layers.5.self_attn
+language_model.model.layers.5.self_attn.q_proj
+language_model.model.layers.5.self_attn.k_proj
+language_model.model.layers.5.self_attn.v_proj
+language_model.model.layers.5.self_attn.o_proj
+language_model.model.layers.5.mlp
+language_model.model.layers.5.mlp.gate_proj
+language_model.model.layers.5.mlp.up_proj
+language_model.model.layers.5.mlp.down_proj
+language_model.model.layers.5.mlp.act_fn
+language_model.model.layers.5.input_layernorm
+language_model.model.layers.6
+language_model.model.layers.6.self_attn
+language_model.model.layers.6.self_attn.q_proj
+language_model.model.layers.6.self_attn.k_proj
+language_model.model.layers.6.self_attn.v_proj
+language_model.model.layers.6.self_attn.o_proj
+language_model.model.layers.6.mlp
+language_model.model.layers.6.mlp.gate_proj
+language_model.model.layers.6.mlp.up_proj
+language_model.model.layers.6.mlp.down_proj
+language_model.model.layers.6.mlp.act_fn
+language_model.model.layers.6.input_layernorm
+language_model.model.layers.7
+language_model.model.layers.7.self_attn
+language_model.model.layers.7.self_attn.q_proj
+language_model.model.layers.7.self_attn.k_proj
+language_model.model.layers.7.self_attn.v_proj
+language_model.model.layers.7.self_attn.o_proj
+language_model.model.layers.7.mlp
+language_model.model.layers.7.mlp.gate_proj
+language_model.model.layers.7.mlp.up_proj
+language_model.model.layers.7.mlp.down_proj
+language_model.model.layers.7.mlp.act_fn
+language_model.model.layers.7.input_layernorm
+language_model.model.layers.8
+language_model.model.layers.8.self_attn
+language_model.model.layers.8.self_attn.q_proj
+language_model.model.layers.8.self_attn.k_proj
+language_model.model.layers.8.self_attn.v_proj
+language_model.model.layers.8.self_attn.o_proj
+language_model.model.layers.8.mlp
+language_model.model.layers.8.mlp.gate_proj
+language_model.model.layers.8.mlp.up_proj
+language_model.model.layers.8.mlp.down_proj
+language_model.model.layers.8.mlp.act_fn
+language_model.model.layers.8.input_layernorm
+language_model.model.layers.9
+language_model.model.layers.9.self_attn
+language_model.model.layers.9.self_attn.q_proj
+language_model.model.layers.9.self_attn.k_proj
+language_model.model.layers.9.self_attn.v_proj
+language_model.model.layers.9.self_attn.o_proj
+language_model.model.layers.9.mlp
+language_model.model.layers.9.mlp.gate_proj
+language_model.model.layers.9.mlp.up_proj
+language_model.model.layers.9.mlp.down_proj
+language_model.model.layers.9.mlp.act_fn
+language_model.model.layers.9.input_layernorm
+language_model.model.layers.10
+language_model.model.layers.10.self_attn
+language_model.model.layers.10.self_attn.q_proj
+language_model.model.layers.10.self_attn.k_proj
+language_model.model.layers.10.self_attn.v_proj
+language_model.model.layers.10.self_attn.o_proj
+language_model.model.layers.10.mlp
+language_model.model.layers.10.mlp.gate_proj
+language_model.model.layers.10.mlp.up_proj
+language_model.model.layers.10.mlp.down_proj
+language_model.model.layers.10.mlp.act_fn
+language_model.model.layers.10.input_layernorm
+language_model.model.layers.11
+language_model.model.layers.11.self_attn
+language_model.model.layers.11.self_attn.q_proj
+language_model.model.layers.11.self_attn.k_proj
+language_model.model.layers.11.self_attn.v_proj
+language_model.model.layers.11.self_attn.o_proj
+language_model.model.layers.11.mlp
+language_model.model.layers.11.mlp.gate_proj
+language_model.model.layers.11.mlp.up_proj
+language_model.model.layers.11.mlp.down_proj
+language_model.model.layers.11.mlp.act_fn
+language_model.model.layers.11.input_layernorm
+language_model.model.layers.12
+language_model.model.layers.12.self_attn
+language_model.model.layers.12.self_attn.q_proj
+language_model.model.layers.12.self_attn.k_proj
+language_model.model.layers.12.self_attn.v_proj
+language_model.model.layers.12.self_attn.o_proj
+language_model.model.layers.12.mlp
+language_model.model.layers.12.mlp.gate_proj
+language_model.model.layers.12.mlp.up_proj
+language_model.model.layers.12.mlp.down_proj
+language_model.model.layers.12.mlp.act_fn
+language_model.model.layers.12.input_layernorm
+language_model.model.layers.13
+language_model.model.layers.13.self_attn
+language_model.model.layers.13.self_attn.q_proj
+language_model.model.layers.13.self_attn.k_proj
+language_model.model.layers.13.self_attn.v_proj
+language_model.model.layers.13.self_attn.o_proj
+language_model.model.layers.13.mlp
+language_model.model.layers.13.mlp.gate_proj
+language_model.model.layers.13.mlp.up_proj
+language_model.model.layers.13.mlp.down_proj
+language_model.model.layers.13.mlp.act_fn
+language_model.model.layers.13.input_layernorm
+language_model.model.layers.14
+language_model.model.layers.14.self_attn
+language_model.model.layers.14.self_attn.q_proj
+language_model.model.layers.14.self_attn.k_proj
+language_model.model.layers.14.self_attn.v_proj
+language_model.model.layers.14.self_attn.o_proj
+language_model.model.layers.14.mlp
+language_model.model.layers.14.mlp.gate_proj
+language_model.model.layers.14.mlp.up_proj
+language_model.model.layers.14.mlp.down_proj
+language_model.model.layers.14.mlp.act_fn
+language_model.model.layers.14.input_layernorm
+language_model.model.layers.15
+language_model.model.layers.15.self_attn
+language_model.model.layers.15.self_attn.q_proj
+language_model.model.layers.15.self_attn.k_proj
+language_model.model.layers.15.self_attn.v_proj
+language_model.model.layers.15.self_attn.o_proj
+language_model.model.layers.15.mlp
+language_model.model.layers.15.mlp.gate_proj
+language_model.model.layers.15.mlp.up_proj
+language_model.model.layers.15.mlp.down_proj
+language_model.model.layers.15.mlp.act_fn
+language_model.model.layers.15.input_layernorm
+language_model.model.layers.16
+language_model.model.layers.16.self_attn
+language_model.model.layers.16.self_attn.q_proj
+language_model.model.layers.16.self_attn.k_proj
+language_model.model.layers.16.self_attn.v_proj
+language_model.model.layers.16.self_attn.o_proj
+language_model.model.layers.16.mlp
+language_model.model.layers.16.mlp.gate_proj
+language_model.model.layers.16.mlp.up_proj
+language_model.model.layers.16.mlp.down_proj
+language_model.model.layers.16.mlp.act_fn
+language_model.model.layers.16.input_layernorm
+language_model.model.layers.17
+language_model.model.layers.17.self_attn
+language_model.model.layers.17.self_attn.q_proj
+language_model.model.layers.17.self_attn.k_proj
+language_model.model.layers.17.self_attn.v_proj
+language_model.model.layers.17.self_attn.o_proj
+language_model.model.layers.17.mlp
+language_model.model.layers.17.mlp.gate_proj
+language_model.model.layers.17.mlp.up_proj
+language_model.model.layers.17.mlp.down_proj
+language_model.model.layers.17.mlp.act_fn
+language_model.model.layers.17.input_layernorm
+language_model.model.layers.18
+language_model.model.layers.18.self_attn
+language_model.model.layers.18.self_attn.q_proj
+language_model.model.layers.18.self_attn.k_proj
+language_model.model.layers.18.self_attn.v_proj
+language_model.model.layers.18.self_attn.o_proj
+language_model.model.layers.18.mlp
+language_model.model.layers.18.mlp.gate_proj
+language_model.model.layers.18.mlp.up_proj
+language_model.model.layers.18.mlp.down_proj
+language_model.model.layers.18.mlp.act_fn
+language_model.model.layers.18.input_layernorm
+language_model.model.layers.19
+language_model.model.layers.19.self_attn
+language_model.model.layers.19.self_attn.q_proj
+language_model.model.layers.19.self_attn.k_proj
+language_model.model.layers.19.self_attn.v_proj
+language_model.model.layers.19.self_attn.o_proj
+language_model.model.layers.19.mlp
+language_model.model.layers.19.mlp.gate_proj
+language_model.model.layers.19.mlp.up_proj
+language_model.model.layers.19.mlp.down_proj
+language_model.model.layers.19.mlp.act_fn
+language_model.model.layers.19.input_layernorm
+language_model.model.layers.20
+language_model.model.layers.20.self_attn
+language_model.model.layers.20.self_attn.q_proj
+language_model.model.layers.20.self_attn.k_proj
+language_model.model.layers.20.self_attn.v_proj
+language_model.model.layers.20.self_attn.o_proj
+language_model.model.layers.20.mlp
+language_model.model.layers.20.mlp.gate_proj
+language_model.model.layers.20.mlp.up_proj
+language_model.model.layers.20.mlp.down_proj
+language_model.model.layers.20.mlp.act_fn
+language_model.model.layers.20.input_layernorm
+language_model.model.layers.21
+language_model.model.layers.21.self_attn
+language_model.model.layers.21.self_attn.q_proj
+language_model.model.layers.21.self_attn.k_proj
+language_model.model.layers.21.self_attn.v_proj
+language_model.model.layers.21.self_attn.o_proj
+language_model.model.layers.21.mlp
+language_model.model.layers.21.mlp.gate_proj
+language_model.model.layers.21.mlp.up_proj
+language_model.model.layers.21.mlp.down_proj
+language_model.model.layers.21.mlp.act_fn
+language_model.model.layers.21.input_layernorm
+language_model.model.layers.22
+language_model.model.layers.22.self_attn
+language_model.model.layers.22.self_attn.q_proj
+language_model.model.layers.22.self_attn.k_proj
+language_model.model.layers.22.self_attn.v_proj
+language_model.model.layers.22.self_attn.o_proj
+language_model.model.layers.22.mlp
+language_model.model.layers.22.mlp.gate_proj
+language_model.model.layers.22.mlp.up_proj
+language_model.model.layers.22.mlp.down_proj
+language_model.model.layers.22.mlp.act_fn
+language_model.model.layers.22.input_layernorm
+language_model.model.layers.23
+language_model.model.layers.23.self_attn
+language_model.model.layers.23.self_attn.q_proj
+language_model.model.layers.23.self_attn.k_proj
+language_model.model.layers.23.self_attn.v_proj
+language_model.model.layers.23.self_attn.o_proj
+language_model.model.layers.23.mlp
+language_model.model.layers.23.mlp.gate_proj
+language_model.model.layers.23.mlp.up_proj
+language_model.model.layers.23.mlp.down_proj
+language_model.model.layers.23.mlp.act_fn
+language_model.model.layers.23.input_layernorm
+language_model.model.layers.24
+language_model.model.layers.24.self_attn
+language_model.model.layers.24.self_attn.q_proj
+language_model.model.layers.24.self_attn.k_proj
+language_model.model.layers.24.self_attn.v_proj
+language_model.model.layers.24.self_attn.o_proj
+language_model.model.layers.24.mlp
+language_model.model.layers.24.mlp.gate_proj
+language_model.model.layers.24.mlp.up_proj
+language_model.model.layers.24.mlp.down_proj
+language_model.model.layers.24.mlp.act_fn
+language_model.model.layers.24.input_layernorm
+language_model.model.layers.25
+language_model.model.layers.25.self_attn
+language_model.model.layers.25.self_attn.q_proj
+language_model.model.layers.25.self_attn.k_proj
+language_model.model.layers.25.self_attn.v_proj
+language_model.model.layers.25.self_attn.o_proj
+language_model.model.layers.25.mlp
+language_model.model.layers.25.mlp.gate_proj
+language_model.model.layers.25.mlp.up_proj
+language_model.model.layers.25.mlp.down_proj
+language_model.model.layers.25.mlp.act_fn
+language_model.model.layers.25.input_layernorm
+language_model.model.layers.26
+language_model.model.layers.26.self_attn
+language_model.model.layers.26.self_attn.q_proj
+language_model.model.layers.26.self_attn.k_proj
+language_model.model.layers.26.self_attn.v_proj
+language_model.model.layers.26.self_attn.o_proj
+language_model.model.layers.26.mlp
+language_model.model.layers.26.mlp.gate_proj
+language_model.model.layers.26.mlp.up_proj
+language_model.model.layers.26.mlp.down_proj
+language_model.model.layers.26.mlp.act_fn
+language_model.model.layers.26.input_layernorm
+language_model.model.layers.27
+language_model.model.layers.27.self_attn
+language_model.model.layers.27.self_attn.q_proj
+language_model.model.layers.27.self_attn.k_proj
+language_model.model.layers.27.self_attn.v_proj
+language_model.model.layers.27.self_attn.o_proj
+language_model.model.layers.27.mlp
+language_model.model.layers.27.mlp.gate_proj
+language_model.model.layers.27.mlp.up_proj
+language_model.model.layers.27.mlp.down_proj
+language_model.model.layers.27.mlp.act_fn
+language_model.model.layers.27.input_layernorm
+language_model.model.layers.28
+language_model.model.layers.28.self_attn
+language_model.model.layers.28.self_attn.q_proj
+language_model.model.layers.28.self_attn.k_proj
+language_model.model.layers.28.self_attn.v_proj
+language_model.model.layers.28.self_attn.o_proj
+language_model.model.layers.28.mlp
+language_model.model.layers.28.mlp.gate_proj
+language_model.model.layers.28.mlp.up_proj
+language_model.model.layers.28.mlp.down_proj
+language_model.model.layers.28.mlp.act_fn
+language_model.model.layers.28.input_layernorm
+language_model.model.layers.29
+language_model.model.layers.29.self_attn
+language_model.model.layers.29.self_attn.q_proj
+language_model.model.layers.29.self_attn.k_proj
+language_model.model.layers.29.self_attn.v_proj
+language_model.model.layers.29.self_attn.o_proj
+language_model.model.layers.29.mlp
+language_model.model.layers.29.mlp.gate_proj
+language_model.model.layers.29.mlp.up_proj
+language_model.model.layers.29.mlp.down_proj
+language_model.model.layers.29.mlp.act_fn
+language_model.model.layers.29.input_layernorm
+language_model.model.layers.30
+language_model.model.layers.30.self_attn
+language_model.model.layers.30.self_attn.q_proj
+language_model.model.layers.30.self_attn.k_proj
+language_model.model.layers.30.self_attn.v_proj
+language_model.model.layers.30.self_attn.o_proj
+language_model.model.layers.30.mlp
+language_model.model.layers.30.mlp.gate_proj
+language_model.model.layers.30.mlp.up_proj
+language_model.model.layers.30.mlp.down_proj
+language_model.model.layers.30.mlp.act_fn
+language_model.model.layers.30.input_layernorm
+language_model.model.layers.31
+language_model.model.layers.31.self_attn
+language_model.model.layers.31.self_attn.q_proj
+language_model.model.layers.31.self_attn.k_proj
+language_model.model.layers.31.self_attn.v_proj
+language_model.model.layers.31.self_attn.o_proj
+language_model.model.layers.31.mlp
+language_model.model.layers.31.mlp.gate_proj
+language_model.model.layers.31.mlp.up_proj
+language_model.model.layers.31.mlp.down_proj
+language_model.model.layers.31.mlp.act_fn
+language_model.model.layers.31.input_layernorm
+language_model.model.norm
+language_model.model.rotary_emb
+language_model.lm_head
diff --git a/logs/MBZUAI/GLaMM-FullScope.txt b/logs/MBZUAI/GLaMM-FullScope.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6632c47b95ca96d05cabc8298741020aa63f494a
--- /dev/null
+++ b/logs/MBZUAI/GLaMM-FullScope.txt
@@ -0,0 +1,950 @@
+model
+model.embed_tokens
+model.layers
+model.layers.0
+model.layers.0.self_attn
+model.layers.0.self_attn.q_proj
+model.layers.0.self_attn.k_proj
+model.layers.0.self_attn.v_proj
+model.layers.0.self_attn.o_proj
+model.layers.0.self_attn.rotary_emb
+model.layers.0.mlp
+model.layers.0.mlp.gate_proj
+model.layers.0.mlp.down_proj
+model.layers.0.mlp.up_proj
+model.layers.0.mlp.act_fn
+model.layers.0.input_layernorm
+model.layers.0.post_attention_layernorm
+model.layers.1
+model.layers.1.self_attn
+model.layers.1.self_attn.q_proj
+model.layers.1.self_attn.k_proj
+model.layers.1.self_attn.v_proj
+model.layers.1.self_attn.o_proj
+model.layers.1.self_attn.rotary_emb
+model.layers.1.mlp
+model.layers.1.mlp.gate_proj
+model.layers.1.mlp.down_proj
+model.layers.1.mlp.up_proj
+model.layers.1.mlp.act_fn
+model.layers.1.input_layernorm
+model.layers.1.post_attention_layernorm
+model.layers.2
+model.layers.2.self_attn
+model.layers.2.self_attn.q_proj
+model.layers.2.self_attn.k_proj
+model.layers.2.self_attn.v_proj
+model.layers.2.self_attn.o_proj
+model.layers.2.self_attn.rotary_emb
+model.layers.2.mlp
+model.layers.2.mlp.gate_proj
+model.layers.2.mlp.down_proj
+model.layers.2.mlp.up_proj
+model.layers.2.mlp.act_fn
+model.layers.2.input_layernorm
+model.layers.2.post_attention_layernorm
+model.layers.3
+model.layers.3.self_attn
+model.layers.3.self_attn.q_proj
+model.layers.3.self_attn.k_proj
+model.layers.3.self_attn.v_proj
+model.layers.3.self_attn.o_proj
+model.layers.3.self_attn.rotary_emb
+model.layers.3.mlp
+model.layers.3.mlp.gate_proj
+model.layers.3.mlp.down_proj
+model.layers.3.mlp.up_proj
+model.layers.3.mlp.act_fn
+model.layers.3.input_layernorm
+model.layers.3.post_attention_layernorm
+model.layers.4
+model.layers.4.self_attn
+model.layers.4.self_attn.q_proj
+model.layers.4.self_attn.k_proj
+model.layers.4.self_attn.v_proj
+model.layers.4.self_attn.o_proj
+model.layers.4.self_attn.rotary_emb
+model.layers.4.mlp
+model.layers.4.mlp.gate_proj
+model.layers.4.mlp.down_proj
+model.layers.4.mlp.up_proj
+model.layers.4.mlp.act_fn
+model.layers.4.input_layernorm
+model.layers.4.post_attention_layernorm
+model.layers.5
+model.layers.5.self_attn
+model.layers.5.self_attn.q_proj
+model.layers.5.self_attn.k_proj
+model.layers.5.self_attn.v_proj
+model.layers.5.self_attn.o_proj
+model.layers.5.self_attn.rotary_emb
+model.layers.5.mlp
+model.layers.5.mlp.gate_proj
+model.layers.5.mlp.down_proj
+model.layers.5.mlp.up_proj
+model.layers.5.mlp.act_fn
+model.layers.5.input_layernorm
+model.layers.5.post_attention_layernorm
+model.layers.6
+model.layers.6.self_attn
+model.layers.6.self_attn.q_proj
+model.layers.6.self_attn.k_proj
+model.layers.6.self_attn.v_proj
+model.layers.6.self_attn.o_proj
+model.layers.6.self_attn.rotary_emb
+model.layers.6.mlp
+model.layers.6.mlp.gate_proj
+model.layers.6.mlp.down_proj
+model.layers.6.mlp.up_proj
+model.layers.6.mlp.act_fn
+model.layers.6.input_layernorm
+model.layers.6.post_attention_layernorm
+model.layers.7
+model.layers.7.self_attn
+model.layers.7.self_attn.q_proj
+model.layers.7.self_attn.k_proj
+model.layers.7.self_attn.v_proj
+model.layers.7.self_attn.o_proj
+model.layers.7.self_attn.rotary_emb
+model.layers.7.mlp
+model.layers.7.mlp.gate_proj
+model.layers.7.mlp.down_proj
+model.layers.7.mlp.up_proj
+model.layers.7.mlp.act_fn
+model.layers.7.input_layernorm
+model.layers.7.post_attention_layernorm
+model.layers.8
+model.layers.8.self_attn
+model.layers.8.self_attn.q_proj
+model.layers.8.self_attn.k_proj
+model.layers.8.self_attn.v_proj
+model.layers.8.self_attn.o_proj
+model.layers.8.self_attn.rotary_emb
+model.layers.8.mlp
+model.layers.8.mlp.gate_proj
+model.layers.8.mlp.down_proj
+model.layers.8.mlp.up_proj
+model.layers.8.mlp.act_fn
+model.layers.8.input_layernorm
+model.layers.8.post_attention_layernorm
+model.layers.9
+model.layers.9.self_attn
+model.layers.9.self_attn.q_proj
+model.layers.9.self_attn.k_proj
+model.layers.9.self_attn.v_proj
+model.layers.9.self_attn.o_proj
+model.layers.9.self_attn.rotary_emb
+model.layers.9.mlp
+model.layers.9.mlp.gate_proj
+model.layers.9.mlp.down_proj
+model.layers.9.mlp.up_proj
+model.layers.9.mlp.act_fn
+model.layers.9.input_layernorm
+model.layers.9.post_attention_layernorm
+model.layers.10
+model.layers.10.self_attn
+model.layers.10.self_attn.q_proj
+model.layers.10.self_attn.k_proj
+model.layers.10.self_attn.v_proj
+model.layers.10.self_attn.o_proj
+model.layers.10.self_attn.rotary_emb
+model.layers.10.mlp
+model.layers.10.mlp.gate_proj
+model.layers.10.mlp.down_proj
+model.layers.10.mlp.up_proj
+model.layers.10.mlp.act_fn
+model.layers.10.input_layernorm
+model.layers.10.post_attention_layernorm
+model.layers.11
+model.layers.11.self_attn
+model.layers.11.self_attn.q_proj
+model.layers.11.self_attn.k_proj
+model.layers.11.self_attn.v_proj
+model.layers.11.self_attn.o_proj
+model.layers.11.self_attn.rotary_emb
+model.layers.11.mlp
+model.layers.11.mlp.gate_proj
+model.layers.11.mlp.down_proj
+model.layers.11.mlp.up_proj
+model.layers.11.mlp.act_fn
+model.layers.11.input_layernorm
+model.layers.11.post_attention_layernorm
+model.layers.12
+model.layers.12.self_attn
+model.layers.12.self_attn.q_proj
+model.layers.12.self_attn.k_proj
+model.layers.12.self_attn.v_proj
+model.layers.12.self_attn.o_proj
+model.layers.12.self_attn.rotary_emb
+model.layers.12.mlp
+model.layers.12.mlp.gate_proj
+model.layers.12.mlp.down_proj
+model.layers.12.mlp.up_proj
+model.layers.12.mlp.act_fn
+model.layers.12.input_layernorm
+model.layers.12.post_attention_layernorm
+model.layers.13
+model.layers.13.self_attn
+model.layers.13.self_attn.q_proj
+model.layers.13.self_attn.k_proj
+model.layers.13.self_attn.v_proj
+model.layers.13.self_attn.o_proj
+model.layers.13.self_attn.rotary_emb
+model.layers.13.mlp
+model.layers.13.mlp.gate_proj
+model.layers.13.mlp.down_proj
+model.layers.13.mlp.up_proj
+model.layers.13.mlp.act_fn
+model.layers.13.input_layernorm
+model.layers.13.post_attention_layernorm
+model.layers.14
+model.layers.14.self_attn
+model.layers.14.self_attn.q_proj
+model.layers.14.self_attn.k_proj
+model.layers.14.self_attn.v_proj
+model.layers.14.self_attn.o_proj
+model.layers.14.self_attn.rotary_emb
+model.layers.14.mlp
+model.layers.14.mlp.gate_proj
+model.layers.14.mlp.down_proj
+model.layers.14.mlp.up_proj
+model.layers.14.mlp.act_fn
+model.layers.14.input_layernorm
+model.layers.14.post_attention_layernorm
+model.layers.15
+model.layers.15.self_attn
+model.layers.15.self_attn.q_proj
+model.layers.15.self_attn.k_proj
+model.layers.15.self_attn.v_proj
+model.layers.15.self_attn.o_proj
+model.layers.15.self_attn.rotary_emb
+model.layers.15.mlp
+model.layers.15.mlp.gate_proj
+model.layers.15.mlp.down_proj
+model.layers.15.mlp.up_proj
+model.layers.15.mlp.act_fn
+model.layers.15.input_layernorm
+model.layers.15.post_attention_layernorm
+model.layers.16
+model.layers.16.self_attn
+model.layers.16.self_attn.q_proj
+model.layers.16.self_attn.k_proj
+model.layers.16.self_attn.v_proj
+model.layers.16.self_attn.o_proj
+model.layers.16.self_attn.rotary_emb
+model.layers.16.mlp
+model.layers.16.mlp.gate_proj
+model.layers.16.mlp.down_proj
+model.layers.16.mlp.up_proj
+model.layers.16.mlp.act_fn
+model.layers.16.input_layernorm
+model.layers.16.post_attention_layernorm
+model.layers.17
+model.layers.17.self_attn
+model.layers.17.self_attn.q_proj
+model.layers.17.self_attn.k_proj
+model.layers.17.self_attn.v_proj
+model.layers.17.self_attn.o_proj
+model.layers.17.self_attn.rotary_emb
+model.layers.17.mlp
+model.layers.17.mlp.gate_proj
+model.layers.17.mlp.down_proj
+model.layers.17.mlp.up_proj
+model.layers.17.mlp.act_fn
+model.layers.17.input_layernorm
+model.layers.17.post_attention_layernorm
+model.layers.18
+model.layers.18.self_attn
+model.layers.18.self_attn.q_proj
+model.layers.18.self_attn.k_proj
+model.layers.18.self_attn.v_proj
+model.layers.18.self_attn.o_proj
+model.layers.18.self_attn.rotary_emb
+model.layers.18.mlp
+model.layers.18.mlp.gate_proj
+model.layers.18.mlp.down_proj
+model.layers.18.mlp.up_proj
+model.layers.18.mlp.act_fn
+model.layers.18.input_layernorm
+model.layers.18.post_attention_layernorm
+model.layers.19
+model.layers.19.self_attn
+model.layers.19.self_attn.q_proj
+model.layers.19.self_attn.k_proj
+model.layers.19.self_attn.v_proj
+model.layers.19.self_attn.o_proj
+model.layers.19.self_attn.rotary_emb
+model.layers.19.mlp
+model.layers.19.mlp.gate_proj
+model.layers.19.mlp.down_proj
+model.layers.19.mlp.up_proj
+model.layers.19.mlp.act_fn
+model.layers.19.input_layernorm
+model.layers.19.post_attention_layernorm
+model.layers.20
+model.layers.20.self_attn
+model.layers.20.self_attn.q_proj
+model.layers.20.self_attn.k_proj
+model.layers.20.self_attn.v_proj
+model.layers.20.self_attn.o_proj
+model.layers.20.self_attn.rotary_emb
+model.layers.20.mlp
+model.layers.20.mlp.gate_proj
+model.layers.20.mlp.down_proj
+model.layers.20.mlp.up_proj
+model.layers.20.mlp.act_fn
+model.layers.20.input_layernorm
+model.layers.20.post_attention_layernorm
+model.layers.21
+model.layers.21.self_attn
+model.layers.21.self_attn.q_proj
+model.layers.21.self_attn.k_proj
+model.layers.21.self_attn.v_proj
+model.layers.21.self_attn.o_proj
+model.layers.21.self_attn.rotary_emb
+model.layers.21.mlp
+model.layers.21.mlp.gate_proj
+model.layers.21.mlp.down_proj
+model.layers.21.mlp.up_proj
+model.layers.21.mlp.act_fn
+model.layers.21.input_layernorm
+model.layers.21.post_attention_layernorm
+model.layers.22
+model.layers.22.self_attn
+model.layers.22.self_attn.q_proj
+model.layers.22.self_attn.k_proj
+model.layers.22.self_attn.v_proj
+model.layers.22.self_attn.o_proj
+model.layers.22.self_attn.rotary_emb
+model.layers.22.mlp
+model.layers.22.mlp.gate_proj
+model.layers.22.mlp.down_proj
+model.layers.22.mlp.up_proj
+model.layers.22.mlp.act_fn
+model.layers.22.input_layernorm
+model.layers.22.post_attention_layernorm
+model.layers.23
+model.layers.23.self_attn
+model.layers.23.self_attn.q_proj
+model.layers.23.self_attn.k_proj
+model.layers.23.self_attn.v_proj
+model.layers.23.self_attn.o_proj
+model.layers.23.self_attn.rotary_emb
+model.layers.23.mlp
+model.layers.23.mlp.gate_proj
+model.layers.23.mlp.down_proj
+model.layers.23.mlp.up_proj
+model.layers.23.mlp.act_fn
+model.layers.23.input_layernorm
+model.layers.23.post_attention_layernorm
+model.layers.24
+model.layers.24.self_attn
+model.layers.24.self_attn.q_proj
+model.layers.24.self_attn.k_proj
+model.layers.24.self_attn.v_proj
+model.layers.24.self_attn.o_proj
+model.layers.24.self_attn.rotary_emb
+model.layers.24.mlp
+model.layers.24.mlp.gate_proj
+model.layers.24.mlp.down_proj
+model.layers.24.mlp.up_proj
+model.layers.24.mlp.act_fn
+model.layers.24.input_layernorm
+model.layers.24.post_attention_layernorm
+model.layers.25
+model.layers.25.self_attn
+model.layers.25.self_attn.q_proj
+model.layers.25.self_attn.k_proj
+model.layers.25.self_attn.v_proj
+model.layers.25.self_attn.o_proj
+model.layers.25.self_attn.rotary_emb
+model.layers.25.mlp
+model.layers.25.mlp.gate_proj
+model.layers.25.mlp.down_proj
+model.layers.25.mlp.up_proj
+model.layers.25.mlp.act_fn
+model.layers.25.input_layernorm
+model.layers.25.post_attention_layernorm
+model.layers.26
+model.layers.26.self_attn
+model.layers.26.self_attn.q_proj
+model.layers.26.self_attn.k_proj
+model.layers.26.self_attn.v_proj
+model.layers.26.self_attn.o_proj
+model.layers.26.self_attn.rotary_emb
+model.layers.26.mlp
+model.layers.26.mlp.gate_proj
+model.layers.26.mlp.down_proj
+model.layers.26.mlp.up_proj
+model.layers.26.mlp.act_fn
+model.layers.26.input_layernorm
+model.layers.26.post_attention_layernorm
+model.layers.27
+model.layers.27.self_attn
+model.layers.27.self_attn.q_proj
+model.layers.27.self_attn.k_proj
+model.layers.27.self_attn.v_proj
+model.layers.27.self_attn.o_proj
+model.layers.27.self_attn.rotary_emb
+model.layers.27.mlp
+model.layers.27.mlp.gate_proj
+model.layers.27.mlp.down_proj
+model.layers.27.mlp.up_proj
+model.layers.27.mlp.act_fn
+model.layers.27.input_layernorm
+model.layers.27.post_attention_layernorm
+model.layers.28
+model.layers.28.self_attn
+model.layers.28.self_attn.q_proj
+model.layers.28.self_attn.k_proj
+model.layers.28.self_attn.v_proj
+model.layers.28.self_attn.o_proj
+model.layers.28.self_attn.rotary_emb
+model.layers.28.mlp
+model.layers.28.mlp.gate_proj
+model.layers.28.mlp.down_proj
+model.layers.28.mlp.up_proj
+model.layers.28.mlp.act_fn
+model.layers.28.input_layernorm
+model.layers.28.post_attention_layernorm
+model.layers.29
+model.layers.29.self_attn
+model.layers.29.self_attn.q_proj
+model.layers.29.self_attn.k_proj
+model.layers.29.self_attn.v_proj
+model.layers.29.self_attn.o_proj
+model.layers.29.self_attn.rotary_emb
+model.layers.29.mlp
+model.layers.29.mlp.gate_proj
+model.layers.29.mlp.down_proj
+model.layers.29.mlp.up_proj
+model.layers.29.mlp.act_fn
+model.layers.29.input_layernorm
+model.layers.29.post_attention_layernorm
+model.layers.30
+model.layers.30.self_attn
+model.layers.30.self_attn.q_proj
+model.layers.30.self_attn.k_proj
+model.layers.30.self_attn.v_proj
+model.layers.30.self_attn.o_proj
+model.layers.30.self_attn.rotary_emb
+model.layers.30.mlp
+model.layers.30.mlp.gate_proj
+model.layers.30.mlp.down_proj
+model.layers.30.mlp.up_proj
+model.layers.30.mlp.act_fn
+model.layers.30.input_layernorm
+model.layers.30.post_attention_layernorm
+model.layers.31
+model.layers.31.self_attn
+model.layers.31.self_attn.q_proj
+model.layers.31.self_attn.k_proj
+model.layers.31.self_attn.v_proj
+model.layers.31.self_attn.o_proj
+model.layers.31.self_attn.rotary_emb
+model.layers.31.mlp
+model.layers.31.mlp.gate_proj
+model.layers.31.mlp.down_proj
+model.layers.31.mlp.up_proj
+model.layers.31.mlp.act_fn
+model.layers.31.input_layernorm
+model.layers.31.post_attention_layernorm
+model.norm
+model.vision_tower
+model.mm_projector
+model.mm_projector.0
+model.mm_projector.1
+model.mm_projector.2
+model.region_encoder
+model.region_encoder.mlvl_fuse
+model.region_encoder.mlvl_fuse.input_conv
+model.region_encoder.mlvl_fuse.input_conv.0
+model.region_encoder.mlvl_fuse.input_conv.1
+model.region_encoder.mlvl_fuse.input_conv.2
+model.region_encoder.mlvl_fuse.input_conv.3
+model.region_encoder.mlvl_fuse.fuse_convs
+model.region_encoder.mlvl_fuse.fuse_convs.0
+model.region_encoder.mlvl_fuse.fuse_convs.0.conv
+model.region_encoder.mlvl_fuse.fuse_convs.0.gn
+model.region_encoder.mlvl_fuse.fuse_convs.0.activate
+model.region_encoder.mlvl_fuse.fuse_convs.1
+model.region_encoder.mlvl_fuse.fuse_convs.1.conv
+model.region_encoder.mlvl_fuse.fuse_convs.1.gn
+model.region_encoder.mlvl_fuse.fuse_convs.1.activate
+model.region_encoder.mlvl_fuse.fuse_convs.2
+model.region_encoder.mlvl_fuse.fuse_convs.2.conv
+model.region_encoder.mlvl_fuse.fuse_convs.2.gn
+model.region_encoder.mlvl_fuse.fuse_convs.2.activate
+model.region_encoder.mlvl_fuse.fuse_convs.3
+model.region_encoder.mlvl_fuse.fuse_convs.3.conv
+model.region_encoder.mlvl_fuse.fuse_convs.3.gn
+model.region_encoder.mlvl_fuse.fuse_convs.3.activate
+model.region_encoder.mlvl_fuse.fuse_convs.4
+model.region_encoder.mlvl_fuse.fuse_convs.4.conv
+model.region_encoder.mlvl_fuse.fuse_convs.4.gn
+model.region_encoder.mlvl_fuse.fuse_convs.4.activate
+model.region_encoder.roi_align
+model.region_encoder.roi_align.roi_layers
+model.region_encoder.roi_align.roi_layers.0
+model.region_encoder.roi_align.roi_layers.1
+model.region_encoder.roi_align.roi_layers.2
+model.region_encoder.roi_align.roi_layers.3
+model.region_encoder.roi_align.pconvs
+model.region_encoder.roi_align.pconvs.0
+model.region_encoder.roi_align.pconvs.1
+model.region_encoder.roi_align.pconvs.2
+model.region_encoder.roi_align.pconvs.3
+model.region_encoder.roi_align.pos_embedd
+model.region_encoder.roi_align.pos_embedd.0
+model.region_encoder.roi_align.pos_embedd.1
+model.region_encoder.roi_align.pos_embedd.2
+model.region_encoder.roi_align.pos_embedd.3
+model.region_encoder.roi_align.pos_embedd.4
+model.region_encoder.roi_align.pos_embedd.5
+model.region_encoder.roi_align.updims
+model.region_encoder.roi_align.flatten_linear
+model.grounding_encoder
+model.grounding_encoder.image_encoder
+model.grounding_encoder.image_encoder.patch_embed
+model.grounding_encoder.image_encoder.patch_embed.proj
+model.grounding_encoder.image_encoder.blocks
+model.grounding_encoder.image_encoder.blocks.0
+model.grounding_encoder.image_encoder.blocks.0.norm1
+model.grounding_encoder.image_encoder.blocks.0.attn
+model.grounding_encoder.image_encoder.blocks.0.attn.qkv
+model.grounding_encoder.image_encoder.blocks.0.attn.proj
+model.grounding_encoder.image_encoder.blocks.0.norm2
+model.grounding_encoder.image_encoder.blocks.0.mlp
+model.grounding_encoder.image_encoder.blocks.0.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.0.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.0.mlp.act
+model.grounding_encoder.image_encoder.blocks.1
+model.grounding_encoder.image_encoder.blocks.1.norm1
+model.grounding_encoder.image_encoder.blocks.1.attn
+model.grounding_encoder.image_encoder.blocks.1.attn.qkv
+model.grounding_encoder.image_encoder.blocks.1.attn.proj
+model.grounding_encoder.image_encoder.blocks.1.norm2
+model.grounding_encoder.image_encoder.blocks.1.mlp
+model.grounding_encoder.image_encoder.blocks.1.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.1.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.1.mlp.act
+model.grounding_encoder.image_encoder.blocks.2
+model.grounding_encoder.image_encoder.blocks.2.norm1
+model.grounding_encoder.image_encoder.blocks.2.attn
+model.grounding_encoder.image_encoder.blocks.2.attn.qkv
+model.grounding_encoder.image_encoder.blocks.2.attn.proj
+model.grounding_encoder.image_encoder.blocks.2.norm2
+model.grounding_encoder.image_encoder.blocks.2.mlp
+model.grounding_encoder.image_encoder.blocks.2.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.2.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.2.mlp.act
+model.grounding_encoder.image_encoder.blocks.3
+model.grounding_encoder.image_encoder.blocks.3.norm1
+model.grounding_encoder.image_encoder.blocks.3.attn
+model.grounding_encoder.image_encoder.blocks.3.attn.qkv
+model.grounding_encoder.image_encoder.blocks.3.attn.proj
+model.grounding_encoder.image_encoder.blocks.3.norm2
+model.grounding_encoder.image_encoder.blocks.3.mlp
+model.grounding_encoder.image_encoder.blocks.3.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.3.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.3.mlp.act
+model.grounding_encoder.image_encoder.blocks.4
+model.grounding_encoder.image_encoder.blocks.4.norm1
+model.grounding_encoder.image_encoder.blocks.4.attn
+model.grounding_encoder.image_encoder.blocks.4.attn.qkv
+model.grounding_encoder.image_encoder.blocks.4.attn.proj
+model.grounding_encoder.image_encoder.blocks.4.norm2
+model.grounding_encoder.image_encoder.blocks.4.mlp
+model.grounding_encoder.image_encoder.blocks.4.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.4.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.4.mlp.act
+model.grounding_encoder.image_encoder.blocks.5
+model.grounding_encoder.image_encoder.blocks.5.norm1
+model.grounding_encoder.image_encoder.blocks.5.attn
+model.grounding_encoder.image_encoder.blocks.5.attn.qkv
+model.grounding_encoder.image_encoder.blocks.5.attn.proj
+model.grounding_encoder.image_encoder.blocks.5.norm2
+model.grounding_encoder.image_encoder.blocks.5.mlp
+model.grounding_encoder.image_encoder.blocks.5.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.5.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.5.mlp.act
+model.grounding_encoder.image_encoder.blocks.6
+model.grounding_encoder.image_encoder.blocks.6.norm1
+model.grounding_encoder.image_encoder.blocks.6.attn
+model.grounding_encoder.image_encoder.blocks.6.attn.qkv
+model.grounding_encoder.image_encoder.blocks.6.attn.proj
+model.grounding_encoder.image_encoder.blocks.6.norm2
+model.grounding_encoder.image_encoder.blocks.6.mlp
+model.grounding_encoder.image_encoder.blocks.6.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.6.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.6.mlp.act
+model.grounding_encoder.image_encoder.blocks.7
+model.grounding_encoder.image_encoder.blocks.7.norm1
+model.grounding_encoder.image_encoder.blocks.7.attn
+model.grounding_encoder.image_encoder.blocks.7.attn.qkv
+model.grounding_encoder.image_encoder.blocks.7.attn.proj
+model.grounding_encoder.image_encoder.blocks.7.norm2
+model.grounding_encoder.image_encoder.blocks.7.mlp
+model.grounding_encoder.image_encoder.blocks.7.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.7.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.7.mlp.act
+model.grounding_encoder.image_encoder.blocks.8
+model.grounding_encoder.image_encoder.blocks.8.norm1
+model.grounding_encoder.image_encoder.blocks.8.attn
+model.grounding_encoder.image_encoder.blocks.8.attn.qkv
+model.grounding_encoder.image_encoder.blocks.8.attn.proj
+model.grounding_encoder.image_encoder.blocks.8.norm2
+model.grounding_encoder.image_encoder.blocks.8.mlp
+model.grounding_encoder.image_encoder.blocks.8.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.8.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.8.mlp.act
+model.grounding_encoder.image_encoder.blocks.9
+model.grounding_encoder.image_encoder.blocks.9.norm1
+model.grounding_encoder.image_encoder.blocks.9.attn
+model.grounding_encoder.image_encoder.blocks.9.attn.qkv
+model.grounding_encoder.image_encoder.blocks.9.attn.proj
+model.grounding_encoder.image_encoder.blocks.9.norm2
+model.grounding_encoder.image_encoder.blocks.9.mlp
+model.grounding_encoder.image_encoder.blocks.9.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.9.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.9.mlp.act
+model.grounding_encoder.image_encoder.blocks.10
+model.grounding_encoder.image_encoder.blocks.10.norm1
+model.grounding_encoder.image_encoder.blocks.10.attn
+model.grounding_encoder.image_encoder.blocks.10.attn.qkv
+model.grounding_encoder.image_encoder.blocks.10.attn.proj
+model.grounding_encoder.image_encoder.blocks.10.norm2
+model.grounding_encoder.image_encoder.blocks.10.mlp
+model.grounding_encoder.image_encoder.blocks.10.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.10.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.10.mlp.act
+model.grounding_encoder.image_encoder.blocks.11
+model.grounding_encoder.image_encoder.blocks.11.norm1
+model.grounding_encoder.image_encoder.blocks.11.attn
+model.grounding_encoder.image_encoder.blocks.11.attn.qkv
+model.grounding_encoder.image_encoder.blocks.11.attn.proj
+model.grounding_encoder.image_encoder.blocks.11.norm2
+model.grounding_encoder.image_encoder.blocks.11.mlp
+model.grounding_encoder.image_encoder.blocks.11.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.11.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.11.mlp.act
+model.grounding_encoder.image_encoder.blocks.12
+model.grounding_encoder.image_encoder.blocks.12.norm1
+model.grounding_encoder.image_encoder.blocks.12.attn
+model.grounding_encoder.image_encoder.blocks.12.attn.qkv
+model.grounding_encoder.image_encoder.blocks.12.attn.proj
+model.grounding_encoder.image_encoder.blocks.12.norm2
+model.grounding_encoder.image_encoder.blocks.12.mlp
+model.grounding_encoder.image_encoder.blocks.12.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.12.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.12.mlp.act
+model.grounding_encoder.image_encoder.blocks.13
+model.grounding_encoder.image_encoder.blocks.13.norm1
+model.grounding_encoder.image_encoder.blocks.13.attn
+model.grounding_encoder.image_encoder.blocks.13.attn.qkv
+model.grounding_encoder.image_encoder.blocks.13.attn.proj
+model.grounding_encoder.image_encoder.blocks.13.norm2
+model.grounding_encoder.image_encoder.blocks.13.mlp
+model.grounding_encoder.image_encoder.blocks.13.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.13.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.13.mlp.act
+model.grounding_encoder.image_encoder.blocks.14
+model.grounding_encoder.image_encoder.blocks.14.norm1
+model.grounding_encoder.image_encoder.blocks.14.attn
+model.grounding_encoder.image_encoder.blocks.14.attn.qkv
+model.grounding_encoder.image_encoder.blocks.14.attn.proj
+model.grounding_encoder.image_encoder.blocks.14.norm2
+model.grounding_encoder.image_encoder.blocks.14.mlp
+model.grounding_encoder.image_encoder.blocks.14.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.14.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.14.mlp.act
+model.grounding_encoder.image_encoder.blocks.15
+model.grounding_encoder.image_encoder.blocks.15.norm1
+model.grounding_encoder.image_encoder.blocks.15.attn
+model.grounding_encoder.image_encoder.blocks.15.attn.qkv
+model.grounding_encoder.image_encoder.blocks.15.attn.proj
+model.grounding_encoder.image_encoder.blocks.15.norm2
+model.grounding_encoder.image_encoder.blocks.15.mlp
+model.grounding_encoder.image_encoder.blocks.15.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.15.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.15.mlp.act
+model.grounding_encoder.image_encoder.blocks.16
+model.grounding_encoder.image_encoder.blocks.16.norm1
+model.grounding_encoder.image_encoder.blocks.16.attn
+model.grounding_encoder.image_encoder.blocks.16.attn.qkv
+model.grounding_encoder.image_encoder.blocks.16.attn.proj
+model.grounding_encoder.image_encoder.blocks.16.norm2
+model.grounding_encoder.image_encoder.blocks.16.mlp
+model.grounding_encoder.image_encoder.blocks.16.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.16.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.16.mlp.act
+model.grounding_encoder.image_encoder.blocks.17
+model.grounding_encoder.image_encoder.blocks.17.norm1
+model.grounding_encoder.image_encoder.blocks.17.attn
+model.grounding_encoder.image_encoder.blocks.17.attn.qkv
+model.grounding_encoder.image_encoder.blocks.17.attn.proj
+model.grounding_encoder.image_encoder.blocks.17.norm2
+model.grounding_encoder.image_encoder.blocks.17.mlp
+model.grounding_encoder.image_encoder.blocks.17.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.17.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.17.mlp.act
+model.grounding_encoder.image_encoder.blocks.18
+model.grounding_encoder.image_encoder.blocks.18.norm1
+model.grounding_encoder.image_encoder.blocks.18.attn
+model.grounding_encoder.image_encoder.blocks.18.attn.qkv
+model.grounding_encoder.image_encoder.blocks.18.attn.proj
+model.grounding_encoder.image_encoder.blocks.18.norm2
+model.grounding_encoder.image_encoder.blocks.18.mlp
+model.grounding_encoder.image_encoder.blocks.18.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.18.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.18.mlp.act
+model.grounding_encoder.image_encoder.blocks.19
+model.grounding_encoder.image_encoder.blocks.19.norm1
+model.grounding_encoder.image_encoder.blocks.19.attn
+model.grounding_encoder.image_encoder.blocks.19.attn.qkv
+model.grounding_encoder.image_encoder.blocks.19.attn.proj
+model.grounding_encoder.image_encoder.blocks.19.norm2
+model.grounding_encoder.image_encoder.blocks.19.mlp
+model.grounding_encoder.image_encoder.blocks.19.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.19.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.19.mlp.act
+model.grounding_encoder.image_encoder.blocks.20
+model.grounding_encoder.image_encoder.blocks.20.norm1
+model.grounding_encoder.image_encoder.blocks.20.attn
+model.grounding_encoder.image_encoder.blocks.20.attn.qkv
+model.grounding_encoder.image_encoder.blocks.20.attn.proj
+model.grounding_encoder.image_encoder.blocks.20.norm2
+model.grounding_encoder.image_encoder.blocks.20.mlp
+model.grounding_encoder.image_encoder.blocks.20.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.20.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.20.mlp.act
+model.grounding_encoder.image_encoder.blocks.21
+model.grounding_encoder.image_encoder.blocks.21.norm1
+model.grounding_encoder.image_encoder.blocks.21.attn
+model.grounding_encoder.image_encoder.blocks.21.attn.qkv
+model.grounding_encoder.image_encoder.blocks.21.attn.proj
+model.grounding_encoder.image_encoder.blocks.21.norm2
+model.grounding_encoder.image_encoder.blocks.21.mlp
+model.grounding_encoder.image_encoder.blocks.21.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.21.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.21.mlp.act
+model.grounding_encoder.image_encoder.blocks.22
+model.grounding_encoder.image_encoder.blocks.22.norm1
+model.grounding_encoder.image_encoder.blocks.22.attn
+model.grounding_encoder.image_encoder.blocks.22.attn.qkv
+model.grounding_encoder.image_encoder.blocks.22.attn.proj
+model.grounding_encoder.image_encoder.blocks.22.norm2
+model.grounding_encoder.image_encoder.blocks.22.mlp
+model.grounding_encoder.image_encoder.blocks.22.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.22.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.22.mlp.act
+model.grounding_encoder.image_encoder.blocks.23
+model.grounding_encoder.image_encoder.blocks.23.norm1
+model.grounding_encoder.image_encoder.blocks.23.attn
+model.grounding_encoder.image_encoder.blocks.23.attn.qkv
+model.grounding_encoder.image_encoder.blocks.23.attn.proj
+model.grounding_encoder.image_encoder.blocks.23.norm2
+model.grounding_encoder.image_encoder.blocks.23.mlp
+model.grounding_encoder.image_encoder.blocks.23.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.23.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.23.mlp.act
+model.grounding_encoder.image_encoder.blocks.24
+model.grounding_encoder.image_encoder.blocks.24.norm1
+model.grounding_encoder.image_encoder.blocks.24.attn
+model.grounding_encoder.image_encoder.blocks.24.attn.qkv
+model.grounding_encoder.image_encoder.blocks.24.attn.proj
+model.grounding_encoder.image_encoder.blocks.24.norm2
+model.grounding_encoder.image_encoder.blocks.24.mlp
+model.grounding_encoder.image_encoder.blocks.24.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.24.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.24.mlp.act
+model.grounding_encoder.image_encoder.blocks.25
+model.grounding_encoder.image_encoder.blocks.25.norm1
+model.grounding_encoder.image_encoder.blocks.25.attn
+model.grounding_encoder.image_encoder.blocks.25.attn.qkv
+model.grounding_encoder.image_encoder.blocks.25.attn.proj
+model.grounding_encoder.image_encoder.blocks.25.norm2
+model.grounding_encoder.image_encoder.blocks.25.mlp
+model.grounding_encoder.image_encoder.blocks.25.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.25.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.25.mlp.act
+model.grounding_encoder.image_encoder.blocks.26
+model.grounding_encoder.image_encoder.blocks.26.norm1
+model.grounding_encoder.image_encoder.blocks.26.attn
+model.grounding_encoder.image_encoder.blocks.26.attn.qkv
+model.grounding_encoder.image_encoder.blocks.26.attn.proj
+model.grounding_encoder.image_encoder.blocks.26.norm2
+model.grounding_encoder.image_encoder.blocks.26.mlp
+model.grounding_encoder.image_encoder.blocks.26.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.26.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.26.mlp.act
+model.grounding_encoder.image_encoder.blocks.27
+model.grounding_encoder.image_encoder.blocks.27.norm1
+model.grounding_encoder.image_encoder.blocks.27.attn
+model.grounding_encoder.image_encoder.blocks.27.attn.qkv
+model.grounding_encoder.image_encoder.blocks.27.attn.proj
+model.grounding_encoder.image_encoder.blocks.27.norm2
+model.grounding_encoder.image_encoder.blocks.27.mlp
+model.grounding_encoder.image_encoder.blocks.27.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.27.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.27.mlp.act
+model.grounding_encoder.image_encoder.blocks.28
+model.grounding_encoder.image_encoder.blocks.28.norm1
+model.grounding_encoder.image_encoder.blocks.28.attn
+model.grounding_encoder.image_encoder.blocks.28.attn.qkv
+model.grounding_encoder.image_encoder.blocks.28.attn.proj
+model.grounding_encoder.image_encoder.blocks.28.norm2
+model.grounding_encoder.image_encoder.blocks.28.mlp
+model.grounding_encoder.image_encoder.blocks.28.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.28.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.28.mlp.act
+model.grounding_encoder.image_encoder.blocks.29
+model.grounding_encoder.image_encoder.blocks.29.norm1
+model.grounding_encoder.image_encoder.blocks.29.attn
+model.grounding_encoder.image_encoder.blocks.29.attn.qkv
+model.grounding_encoder.image_encoder.blocks.29.attn.proj
+model.grounding_encoder.image_encoder.blocks.29.norm2
+model.grounding_encoder.image_encoder.blocks.29.mlp
+model.grounding_encoder.image_encoder.blocks.29.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.29.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.29.mlp.act
+model.grounding_encoder.image_encoder.blocks.30
+model.grounding_encoder.image_encoder.blocks.30.norm1
+model.grounding_encoder.image_encoder.blocks.30.attn
+model.grounding_encoder.image_encoder.blocks.30.attn.qkv
+model.grounding_encoder.image_encoder.blocks.30.attn.proj
+model.grounding_encoder.image_encoder.blocks.30.norm2
+model.grounding_encoder.image_encoder.blocks.30.mlp
+model.grounding_encoder.image_encoder.blocks.30.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.30.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.30.mlp.act
+model.grounding_encoder.image_encoder.blocks.31
+model.grounding_encoder.image_encoder.blocks.31.norm1
+model.grounding_encoder.image_encoder.blocks.31.attn
+model.grounding_encoder.image_encoder.blocks.31.attn.qkv
+model.grounding_encoder.image_encoder.blocks.31.attn.proj
+model.grounding_encoder.image_encoder.blocks.31.norm2
+model.grounding_encoder.image_encoder.blocks.31.mlp
+model.grounding_encoder.image_encoder.blocks.31.mlp.lin1
+model.grounding_encoder.image_encoder.blocks.31.mlp.lin2
+model.grounding_encoder.image_encoder.blocks.31.mlp.act
+model.grounding_encoder.image_encoder.neck
+model.grounding_encoder.image_encoder.neck.0
+model.grounding_encoder.image_encoder.neck.1
+model.grounding_encoder.image_encoder.neck.2
+model.grounding_encoder.image_encoder.neck.3
+model.grounding_encoder.prompt_encoder
+model.grounding_encoder.prompt_encoder.pe_layer
+model.grounding_encoder.prompt_encoder.point_embeddings
+model.grounding_encoder.prompt_encoder.point_embeddings.0
+model.grounding_encoder.prompt_encoder.point_embeddings.1
+model.grounding_encoder.prompt_encoder.point_embeddings.2
+model.grounding_encoder.prompt_encoder.point_embeddings.3
+model.grounding_encoder.prompt_encoder.not_a_point_embed
+model.grounding_encoder.prompt_encoder.mask_downscaling
+model.grounding_encoder.prompt_encoder.mask_downscaling.0
+model.grounding_encoder.prompt_encoder.mask_downscaling.1
+model.grounding_encoder.prompt_encoder.mask_downscaling.2
+model.grounding_encoder.prompt_encoder.mask_downscaling.3
+model.grounding_encoder.prompt_encoder.mask_downscaling.4
+model.grounding_encoder.prompt_encoder.mask_downscaling.5
+model.grounding_encoder.prompt_encoder.mask_downscaling.6
+model.grounding_encoder.prompt_encoder.no_mask_embed
+model.grounding_encoder.mask_decoder
+model.grounding_encoder.mask_decoder.transformer
+model.grounding_encoder.mask_decoder.transformer.layers
+model.grounding_encoder.mask_decoder.transformer.layers.0
+model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn
+model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn.q_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn.k_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn.v_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.self_attn.out_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.norm1
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image.q_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image.k_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image.v_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_token_to_image.out_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.norm2
+model.grounding_encoder.mask_decoder.transformer.layers.0.mlp
+model.grounding_encoder.mask_decoder.transformer.layers.0.mlp.lin1
+model.grounding_encoder.mask_decoder.transformer.layers.0.mlp.lin2
+model.grounding_encoder.mask_decoder.transformer.layers.0.mlp.act
+model.grounding_encoder.mask_decoder.transformer.layers.0.norm3
+model.grounding_encoder.mask_decoder.transformer.layers.0.norm4
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token.q_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token.k_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token.v_proj
+model.grounding_encoder.mask_decoder.transformer.layers.0.cross_attn_image_to_token.out_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1
+model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn
+model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn.q_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn.k_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn.v_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.self_attn.out_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.norm1
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image.q_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image.k_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image.v_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_token_to_image.out_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.norm2
+model.grounding_encoder.mask_decoder.transformer.layers.1.mlp
+model.grounding_encoder.mask_decoder.transformer.layers.1.mlp.lin1
+model.grounding_encoder.mask_decoder.transformer.layers.1.mlp.lin2
+model.grounding_encoder.mask_decoder.transformer.layers.1.mlp.act
+model.grounding_encoder.mask_decoder.transformer.layers.1.norm3
+model.grounding_encoder.mask_decoder.transformer.layers.1.norm4
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token.q_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token.k_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token.v_proj
+model.grounding_encoder.mask_decoder.transformer.layers.1.cross_attn_image_to_token.out_proj
+model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image
+model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image.q_proj
+model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image.k_proj
+model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image.v_proj
+model.grounding_encoder.mask_decoder.transformer.final_attn_token_to_image.out_proj
+model.grounding_encoder.mask_decoder.transformer.norm_final_attn
+model.grounding_encoder.mask_decoder.iou_token
+model.grounding_encoder.mask_decoder.mask_tokens
+model.grounding_encoder.mask_decoder.output_upscaling
+model.grounding_encoder.mask_decoder.output_upscaling.0
+model.grounding_encoder.mask_decoder.output_upscaling.1
+model.grounding_encoder.mask_decoder.output_upscaling.2
+model.grounding_encoder.mask_decoder.output_upscaling.3
+model.grounding_encoder.mask_decoder.output_upscaling.4
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0.layers
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0.layers.0
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0.layers.1
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.0.layers.2
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1.layers
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1.layers.0
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1.layers.1
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.1.layers.2
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2.layers
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2.layers.0
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2.layers.1
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.2.layers.2
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3.layers
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3.layers.0
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3.layers.1
+model.grounding_encoder.mask_decoder.output_hypernetworks_mlps.3.layers.2
+model.grounding_encoder.mask_decoder.iou_prediction_head
+model.grounding_encoder.mask_decoder.iou_prediction_head.layers
+model.grounding_encoder.mask_decoder.iou_prediction_head.layers.0
+model.grounding_encoder.mask_decoder.iou_prediction_head.layers.1
+model.grounding_encoder.mask_decoder.iou_prediction_head.layers.2
+model.text_hidden_fcs
+model.text_hidden_fcs.0
+model.text_hidden_fcs.0.0
+model.text_hidden_fcs.0.1
+model.text_hidden_fcs.0.2
+model.text_hidden_fcs.0.3
+lm_head
diff --git a/logs/Qwen/Qwen2-VL-2B-Instruct.txt b/logs/Qwen/Qwen2-VL-2B-Instruct.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f5942e02b92a279abd57e6f1e1bde8fff7bb86a
--- /dev/null
+++ b/logs/Qwen/Qwen2-VL-2B-Instruct.txt
@@ -0,0 +1,730 @@
+
+visual
+visual.patch_embed
+visual.patch_embed.proj
+visual.rotary_pos_emb
+visual.blocks
+visual.blocks.0
+visual.blocks.0.norm1
+visual.blocks.0.norm2
+visual.blocks.0.attn
+visual.blocks.0.attn.qkv
+visual.blocks.0.attn.proj
+visual.blocks.0.mlp
+visual.blocks.0.mlp.fc1
+visual.blocks.0.mlp.act
+visual.blocks.0.mlp.fc2
+visual.blocks.1
+visual.blocks.1.norm1
+visual.blocks.1.norm2
+visual.blocks.1.attn
+visual.blocks.1.attn.qkv
+visual.blocks.1.attn.proj
+visual.blocks.1.mlp
+visual.blocks.1.mlp.fc1
+visual.blocks.1.mlp.act
+visual.blocks.1.mlp.fc2
+visual.blocks.2
+visual.blocks.2.norm1
+visual.blocks.2.norm2
+visual.blocks.2.attn
+visual.blocks.2.attn.qkv
+visual.blocks.2.attn.proj
+visual.blocks.2.mlp
+visual.blocks.2.mlp.fc1
+visual.blocks.2.mlp.act
+visual.blocks.2.mlp.fc2
+visual.blocks.3
+visual.blocks.3.norm1
+visual.blocks.3.norm2
+visual.blocks.3.attn
+visual.blocks.3.attn.qkv
+visual.blocks.3.attn.proj
+visual.blocks.3.mlp
+visual.blocks.3.mlp.fc1
+visual.blocks.3.mlp.act
+visual.blocks.3.mlp.fc2
+visual.blocks.4
+visual.blocks.4.norm1
+visual.blocks.4.norm2
+visual.blocks.4.attn
+visual.blocks.4.attn.qkv
+visual.blocks.4.attn.proj
+visual.blocks.4.mlp
+visual.blocks.4.mlp.fc1
+visual.blocks.4.mlp.act
+visual.blocks.4.mlp.fc2
+visual.blocks.5
+visual.blocks.5.norm1
+visual.blocks.5.norm2
+visual.blocks.5.attn
+visual.blocks.5.attn.qkv
+visual.blocks.5.attn.proj
+visual.blocks.5.mlp
+visual.blocks.5.mlp.fc1
+visual.blocks.5.mlp.act
+visual.blocks.5.mlp.fc2
+visual.blocks.6
+visual.blocks.6.norm1
+visual.blocks.6.norm2
+visual.blocks.6.attn
+visual.blocks.6.attn.qkv
+visual.blocks.6.attn.proj
+visual.blocks.6.mlp
+visual.blocks.6.mlp.fc1
+visual.blocks.6.mlp.act
+visual.blocks.6.mlp.fc2
+visual.blocks.7
+visual.blocks.7.norm1
+visual.blocks.7.norm2
+visual.blocks.7.attn
+visual.blocks.7.attn.qkv
+visual.blocks.7.attn.proj
+visual.blocks.7.mlp
+visual.blocks.7.mlp.fc1
+visual.blocks.7.mlp.act
+visual.blocks.7.mlp.fc2
+visual.blocks.8
+visual.blocks.8.norm1
+visual.blocks.8.norm2
+visual.blocks.8.attn
+visual.blocks.8.attn.qkv
+visual.blocks.8.attn.proj
+visual.blocks.8.mlp
+visual.blocks.8.mlp.fc1
+visual.blocks.8.mlp.act
+visual.blocks.8.mlp.fc2
+visual.blocks.9
+visual.blocks.9.norm1
+visual.blocks.9.norm2
+visual.blocks.9.attn
+visual.blocks.9.attn.qkv
+visual.blocks.9.attn.proj
+visual.blocks.9.mlp
+visual.blocks.9.mlp.fc1
+visual.blocks.9.mlp.act
+visual.blocks.9.mlp.fc2
+visual.blocks.10
+visual.blocks.10.norm1
+visual.blocks.10.norm2
+visual.blocks.10.attn
+visual.blocks.10.attn.qkv
+visual.blocks.10.attn.proj
+visual.blocks.10.mlp
+visual.blocks.10.mlp.fc1
+visual.blocks.10.mlp.act
+visual.blocks.10.mlp.fc2
+visual.blocks.11
+visual.blocks.11.norm1
+visual.blocks.11.norm2
+visual.blocks.11.attn
+visual.blocks.11.attn.qkv
+visual.blocks.11.attn.proj
+visual.blocks.11.mlp
+visual.blocks.11.mlp.fc1
+visual.blocks.11.mlp.act
+visual.blocks.11.mlp.fc2
+visual.blocks.12
+visual.blocks.12.norm1
+visual.blocks.12.norm2
+visual.blocks.12.attn
+visual.blocks.12.attn.qkv
+visual.blocks.12.attn.proj
+visual.blocks.12.mlp
+visual.blocks.12.mlp.fc1
+visual.blocks.12.mlp.act
+visual.blocks.12.mlp.fc2
+visual.blocks.13
+visual.blocks.13.norm1
+visual.blocks.13.norm2
+visual.blocks.13.attn
+visual.blocks.13.attn.qkv
+visual.blocks.13.attn.proj
+visual.blocks.13.mlp
+visual.blocks.13.mlp.fc1
+visual.blocks.13.mlp.act
+visual.blocks.13.mlp.fc2
+visual.blocks.14
+visual.blocks.14.norm1
+visual.blocks.14.norm2
+visual.blocks.14.attn
+visual.blocks.14.attn.qkv
+visual.blocks.14.attn.proj
+visual.blocks.14.mlp
+visual.blocks.14.mlp.fc1
+visual.blocks.14.mlp.act
+visual.blocks.14.mlp.fc2
+visual.blocks.15
+visual.blocks.15.norm1
+visual.blocks.15.norm2
+visual.blocks.15.attn
+visual.blocks.15.attn.qkv
+visual.blocks.15.attn.proj
+visual.blocks.15.mlp
+visual.blocks.15.mlp.fc1
+visual.blocks.15.mlp.act
+visual.blocks.15.mlp.fc2
+visual.blocks.16
+visual.blocks.16.norm1
+visual.blocks.16.norm2
+visual.blocks.16.attn
+visual.blocks.16.attn.qkv
+visual.blocks.16.attn.proj
+visual.blocks.16.mlp
+visual.blocks.16.mlp.fc1
+visual.blocks.16.mlp.act
+visual.blocks.16.mlp.fc2
+visual.blocks.17
+visual.blocks.17.norm1
+visual.blocks.17.norm2
+visual.blocks.17.attn
+visual.blocks.17.attn.qkv
+visual.blocks.17.attn.proj
+visual.blocks.17.mlp
+visual.blocks.17.mlp.fc1
+visual.blocks.17.mlp.act
+visual.blocks.17.mlp.fc2
+visual.blocks.18
+visual.blocks.18.norm1
+visual.blocks.18.norm2
+visual.blocks.18.attn
+visual.blocks.18.attn.qkv
+visual.blocks.18.attn.proj
+visual.blocks.18.mlp
+visual.blocks.18.mlp.fc1
+visual.blocks.18.mlp.act
+visual.blocks.18.mlp.fc2
+visual.blocks.19
+visual.blocks.19.norm1
+visual.blocks.19.norm2
+visual.blocks.19.attn
+visual.blocks.19.attn.qkv
+visual.blocks.19.attn.proj
+visual.blocks.19.mlp
+visual.blocks.19.mlp.fc1
+visual.blocks.19.mlp.act
+visual.blocks.19.mlp.fc2
+visual.blocks.20
+visual.blocks.20.norm1
+visual.blocks.20.norm2
+visual.blocks.20.attn
+visual.blocks.20.attn.qkv
+visual.blocks.20.attn.proj
+visual.blocks.20.mlp
+visual.blocks.20.mlp.fc1
+visual.blocks.20.mlp.act
+visual.blocks.20.mlp.fc2
+visual.blocks.21
+visual.blocks.21.norm1
+visual.blocks.21.norm2
+visual.blocks.21.attn
+visual.blocks.21.attn.qkv
+visual.blocks.21.attn.proj
+visual.blocks.21.mlp
+visual.blocks.21.mlp.fc1
+visual.blocks.21.mlp.act
+visual.blocks.21.mlp.fc2
+visual.blocks.22
+visual.blocks.22.norm1
+visual.blocks.22.norm2
+visual.blocks.22.attn
+visual.blocks.22.attn.qkv
+visual.blocks.22.attn.proj
+visual.blocks.22.mlp
+visual.blocks.22.mlp.fc1
+visual.blocks.22.mlp.act
+visual.blocks.22.mlp.fc2
+visual.blocks.23
+visual.blocks.23.norm1
+visual.blocks.23.norm2
+visual.blocks.23.attn
+visual.blocks.23.attn.qkv
+visual.blocks.23.attn.proj
+visual.blocks.23.mlp
+visual.blocks.23.mlp.fc1
+visual.blocks.23.mlp.act
+visual.blocks.23.mlp.fc2
+visual.blocks.24
+visual.blocks.24.norm1
+visual.blocks.24.norm2
+visual.blocks.24.attn
+visual.blocks.24.attn.qkv
+visual.blocks.24.attn.proj
+visual.blocks.24.mlp
+visual.blocks.24.mlp.fc1
+visual.blocks.24.mlp.act
+visual.blocks.24.mlp.fc2
+visual.blocks.25
+visual.blocks.25.norm1
+visual.blocks.25.norm2
+visual.blocks.25.attn
+visual.blocks.25.attn.qkv
+visual.blocks.25.attn.proj
+visual.blocks.25.mlp
+visual.blocks.25.mlp.fc1
+visual.blocks.25.mlp.act
+visual.blocks.25.mlp.fc2
+visual.blocks.26
+visual.blocks.26.norm1
+visual.blocks.26.norm2
+visual.blocks.26.attn
+visual.blocks.26.attn.qkv
+visual.blocks.26.attn.proj
+visual.blocks.26.mlp
+visual.blocks.26.mlp.fc1
+visual.blocks.26.mlp.act
+visual.blocks.26.mlp.fc2
+visual.blocks.27
+visual.blocks.27.norm1
+visual.blocks.27.norm2
+visual.blocks.27.attn
+visual.blocks.27.attn.qkv
+visual.blocks.27.attn.proj
+visual.blocks.27.mlp
+visual.blocks.27.mlp.fc1
+visual.blocks.27.mlp.act
+visual.blocks.27.mlp.fc2
+visual.blocks.28
+visual.blocks.28.norm1
+visual.blocks.28.norm2
+visual.blocks.28.attn
+visual.blocks.28.attn.qkv
+visual.blocks.28.attn.proj
+visual.blocks.28.mlp
+visual.blocks.28.mlp.fc1
+visual.blocks.28.mlp.act
+visual.blocks.28.mlp.fc2
+visual.blocks.29
+visual.blocks.29.norm1
+visual.blocks.29.norm2
+visual.blocks.29.attn
+visual.blocks.29.attn.qkv
+visual.blocks.29.attn.proj
+visual.blocks.29.mlp
+visual.blocks.29.mlp.fc1
+visual.blocks.29.mlp.act
+visual.blocks.29.mlp.fc2
+visual.blocks.30
+visual.blocks.30.norm1
+visual.blocks.30.norm2
+visual.blocks.30.attn
+visual.blocks.30.attn.qkv
+visual.blocks.30.attn.proj
+visual.blocks.30.mlp
+visual.blocks.30.mlp.fc1
+visual.blocks.30.mlp.act
+visual.blocks.30.mlp.fc2
+visual.blocks.31
+visual.blocks.31.norm1
+visual.blocks.31.norm2
+visual.blocks.31.attn
+visual.blocks.31.attn.qkv
+visual.blocks.31.attn.proj
+visual.blocks.31.mlp
+visual.blocks.31.mlp.fc1
+visual.blocks.31.mlp.act
+visual.blocks.31.mlp.fc2
+visual.merger
+visual.merger.ln_q
+visual.merger.mlp
+visual.merger.mlp.0
+visual.merger.mlp.1
+visual.merger.mlp.2
+model
+model.embed_tokens
+model.layers
+model.layers.0
+model.layers.0.self_attn
+model.layers.0.self_attn.q_proj
+model.layers.0.self_attn.k_proj
+model.layers.0.self_attn.v_proj
+model.layers.0.self_attn.o_proj
+model.layers.0.self_attn.rotary_emb
+model.layers.0.mlp
+model.layers.0.mlp.gate_proj
+model.layers.0.mlp.up_proj
+model.layers.0.mlp.down_proj
+model.layers.0.mlp.act_fn
+model.layers.0.input_layernorm
+model.layers.0.post_attention_layernorm
+model.layers.1
+model.layers.1.self_attn
+model.layers.1.self_attn.q_proj
+model.layers.1.self_attn.k_proj
+model.layers.1.self_attn.v_proj
+model.layers.1.self_attn.o_proj
+model.layers.1.self_attn.rotary_emb
+model.layers.1.mlp
+model.layers.1.mlp.gate_proj
+model.layers.1.mlp.up_proj
+model.layers.1.mlp.down_proj
+model.layers.1.mlp.act_fn
+model.layers.1.input_layernorm
+model.layers.1.post_attention_layernorm
+model.layers.2
+model.layers.2.self_attn
+model.layers.2.self_attn.q_proj
+model.layers.2.self_attn.k_proj
+model.layers.2.self_attn.v_proj
+model.layers.2.self_attn.o_proj
+model.layers.2.self_attn.rotary_emb
+model.layers.2.mlp
+model.layers.2.mlp.gate_proj
+model.layers.2.mlp.up_proj
+model.layers.2.mlp.down_proj
+model.layers.2.mlp.act_fn
+model.layers.2.input_layernorm
+model.layers.2.post_attention_layernorm
+model.layers.3
+model.layers.3.self_attn
+model.layers.3.self_attn.q_proj
+model.layers.3.self_attn.k_proj
+model.layers.3.self_attn.v_proj
+model.layers.3.self_attn.o_proj
+model.layers.3.self_attn.rotary_emb
+model.layers.3.mlp
+model.layers.3.mlp.gate_proj
+model.layers.3.mlp.up_proj
+model.layers.3.mlp.down_proj
+model.layers.3.mlp.act_fn
+model.layers.3.input_layernorm
+model.layers.3.post_attention_layernorm
+model.layers.4
+model.layers.4.self_attn
+model.layers.4.self_attn.q_proj
+model.layers.4.self_attn.k_proj
+model.layers.4.self_attn.v_proj
+model.layers.4.self_attn.o_proj
+model.layers.4.self_attn.rotary_emb
+model.layers.4.mlp
+model.layers.4.mlp.gate_proj
+model.layers.4.mlp.up_proj
+model.layers.4.mlp.down_proj
+model.layers.4.mlp.act_fn
+model.layers.4.input_layernorm
+model.layers.4.post_attention_layernorm
+model.layers.5
+model.layers.5.self_attn
+model.layers.5.self_attn.q_proj
+model.layers.5.self_attn.k_proj
+model.layers.5.self_attn.v_proj
+model.layers.5.self_attn.o_proj
+model.layers.5.self_attn.rotary_emb
+model.layers.5.mlp
+model.layers.5.mlp.gate_proj
+model.layers.5.mlp.up_proj
+model.layers.5.mlp.down_proj
+model.layers.5.mlp.act_fn
+model.layers.5.input_layernorm
+model.layers.5.post_attention_layernorm
+model.layers.6
+model.layers.6.self_attn
+model.layers.6.self_attn.q_proj
+model.layers.6.self_attn.k_proj
+model.layers.6.self_attn.v_proj
+model.layers.6.self_attn.o_proj
+model.layers.6.self_attn.rotary_emb
+model.layers.6.mlp
+model.layers.6.mlp.gate_proj
+model.layers.6.mlp.up_proj
+model.layers.6.mlp.down_proj
+model.layers.6.mlp.act_fn
+model.layers.6.input_layernorm
+model.layers.6.post_attention_layernorm
+model.layers.7
+model.layers.7.self_attn
+model.layers.7.self_attn.q_proj
+model.layers.7.self_attn.k_proj
+model.layers.7.self_attn.v_proj
+model.layers.7.self_attn.o_proj
+model.layers.7.self_attn.rotary_emb
+model.layers.7.mlp
+model.layers.7.mlp.gate_proj
+model.layers.7.mlp.up_proj
+model.layers.7.mlp.down_proj
+model.layers.7.mlp.act_fn
+model.layers.7.input_layernorm
+model.layers.7.post_attention_layernorm
+model.layers.8
+model.layers.8.self_attn
+model.layers.8.self_attn.q_proj
+model.layers.8.self_attn.k_proj
+model.layers.8.self_attn.v_proj
+model.layers.8.self_attn.o_proj
+model.layers.8.self_attn.rotary_emb
+model.layers.8.mlp
+model.layers.8.mlp.gate_proj
+model.layers.8.mlp.up_proj
+model.layers.8.mlp.down_proj
+model.layers.8.mlp.act_fn
+model.layers.8.input_layernorm
+model.layers.8.post_attention_layernorm
+model.layers.9
+model.layers.9.self_attn
+model.layers.9.self_attn.q_proj
+model.layers.9.self_attn.k_proj
+model.layers.9.self_attn.v_proj
+model.layers.9.self_attn.o_proj
+model.layers.9.self_attn.rotary_emb
+model.layers.9.mlp
+model.layers.9.mlp.gate_proj
+model.layers.9.mlp.up_proj
+model.layers.9.mlp.down_proj
+model.layers.9.mlp.act_fn
+model.layers.9.input_layernorm
+model.layers.9.post_attention_layernorm
+model.layers.10
+model.layers.10.self_attn
+model.layers.10.self_attn.q_proj
+model.layers.10.self_attn.k_proj
+model.layers.10.self_attn.v_proj
+model.layers.10.self_attn.o_proj
+model.layers.10.self_attn.rotary_emb
+model.layers.10.mlp
+model.layers.10.mlp.gate_proj
+model.layers.10.mlp.up_proj
+model.layers.10.mlp.down_proj
+model.layers.10.mlp.act_fn
+model.layers.10.input_layernorm
+model.layers.10.post_attention_layernorm
+model.layers.11
+model.layers.11.self_attn
+model.layers.11.self_attn.q_proj
+model.layers.11.self_attn.k_proj
+model.layers.11.self_attn.v_proj
+model.layers.11.self_attn.o_proj
+model.layers.11.self_attn.rotary_emb
+model.layers.11.mlp
+model.layers.11.mlp.gate_proj
+model.layers.11.mlp.up_proj
+model.layers.11.mlp.down_proj
+model.layers.11.mlp.act_fn
+model.layers.11.input_layernorm
+model.layers.11.post_attention_layernorm
+model.layers.12
+model.layers.12.self_attn
+model.layers.12.self_attn.q_proj
+model.layers.12.self_attn.k_proj
+model.layers.12.self_attn.v_proj
+model.layers.12.self_attn.o_proj
+model.layers.12.self_attn.rotary_emb
+model.layers.12.mlp
+model.layers.12.mlp.gate_proj
+model.layers.12.mlp.up_proj
+model.layers.12.mlp.down_proj
+model.layers.12.mlp.act_fn
+model.layers.12.input_layernorm
+model.layers.12.post_attention_layernorm
+model.layers.13
+model.layers.13.self_attn
+model.layers.13.self_attn.q_proj
+model.layers.13.self_attn.k_proj
+model.layers.13.self_attn.v_proj
+model.layers.13.self_attn.o_proj
+model.layers.13.self_attn.rotary_emb
+model.layers.13.mlp
+model.layers.13.mlp.gate_proj
+model.layers.13.mlp.up_proj
+model.layers.13.mlp.down_proj
+model.layers.13.mlp.act_fn
+model.layers.13.input_layernorm
+model.layers.13.post_attention_layernorm
+model.layers.14
+model.layers.14.self_attn
+model.layers.14.self_attn.q_proj
+model.layers.14.self_attn.k_proj
+model.layers.14.self_attn.v_proj
+model.layers.14.self_attn.o_proj
+model.layers.14.self_attn.rotary_emb
+model.layers.14.mlp
+model.layers.14.mlp.gate_proj
+model.layers.14.mlp.up_proj
+model.layers.14.mlp.down_proj
+model.layers.14.mlp.act_fn
+model.layers.14.input_layernorm
+model.layers.14.post_attention_layernorm
+model.layers.15
+model.layers.15.self_attn
+model.layers.15.self_attn.q_proj
+model.layers.15.self_attn.k_proj
+model.layers.15.self_attn.v_proj
+model.layers.15.self_attn.o_proj
+model.layers.15.self_attn.rotary_emb
+model.layers.15.mlp
+model.layers.15.mlp.gate_proj
+model.layers.15.mlp.up_proj
+model.layers.15.mlp.down_proj
+model.layers.15.mlp.act_fn
+model.layers.15.input_layernorm
+model.layers.15.post_attention_layernorm
+model.layers.16
+model.layers.16.self_attn
+model.layers.16.self_attn.q_proj
+model.layers.16.self_attn.k_proj
+model.layers.16.self_attn.v_proj
+model.layers.16.self_attn.o_proj
+model.layers.16.self_attn.rotary_emb
+model.layers.16.mlp
+model.layers.16.mlp.gate_proj
+model.layers.16.mlp.up_proj
+model.layers.16.mlp.down_proj
+model.layers.16.mlp.act_fn
+model.layers.16.input_layernorm
+model.layers.16.post_attention_layernorm
+model.layers.17
+model.layers.17.self_attn
+model.layers.17.self_attn.q_proj
+model.layers.17.self_attn.k_proj
+model.layers.17.self_attn.v_proj
+model.layers.17.self_attn.o_proj
+model.layers.17.self_attn.rotary_emb
+model.layers.17.mlp
+model.layers.17.mlp.gate_proj
+model.layers.17.mlp.up_proj
+model.layers.17.mlp.down_proj
+model.layers.17.mlp.act_fn
+model.layers.17.input_layernorm
+model.layers.17.post_attention_layernorm
+model.layers.18
+model.layers.18.self_attn
+model.layers.18.self_attn.q_proj
+model.layers.18.self_attn.k_proj
+model.layers.18.self_attn.v_proj
+model.layers.18.self_attn.o_proj
+model.layers.18.self_attn.rotary_emb
+model.layers.18.mlp
+model.layers.18.mlp.gate_proj
+model.layers.18.mlp.up_proj
+model.layers.18.mlp.down_proj
+model.layers.18.mlp.act_fn
+model.layers.18.input_layernorm
+model.layers.18.post_attention_layernorm
+model.layers.19
+model.layers.19.self_attn
+model.layers.19.self_attn.q_proj
+model.layers.19.self_attn.k_proj
+model.layers.19.self_attn.v_proj
+model.layers.19.self_attn.o_proj
+model.layers.19.self_attn.rotary_emb
+model.layers.19.mlp
+model.layers.19.mlp.gate_proj
+model.layers.19.mlp.up_proj
+model.layers.19.mlp.down_proj
+model.layers.19.mlp.act_fn
+model.layers.19.input_layernorm
+model.layers.19.post_attention_layernorm
+model.layers.20
+model.layers.20.self_attn
+model.layers.20.self_attn.q_proj
+model.layers.20.self_attn.k_proj
+model.layers.20.self_attn.v_proj
+model.layers.20.self_attn.o_proj
+model.layers.20.self_attn.rotary_emb
+model.layers.20.mlp
+model.layers.20.mlp.gate_proj
+model.layers.20.mlp.up_proj
+model.layers.20.mlp.down_proj
+model.layers.20.mlp.act_fn
+model.layers.20.input_layernorm
+model.layers.20.post_attention_layernorm
+model.layers.21
+model.layers.21.self_attn
+model.layers.21.self_attn.q_proj
+model.layers.21.self_attn.k_proj
+model.layers.21.self_attn.v_proj
+model.layers.21.self_attn.o_proj
+model.layers.21.self_attn.rotary_emb
+model.layers.21.mlp
+model.layers.21.mlp.gate_proj
+model.layers.21.mlp.up_proj
+model.layers.21.mlp.down_proj
+model.layers.21.mlp.act_fn
+model.layers.21.input_layernorm
+model.layers.21.post_attention_layernorm
+model.layers.22
+model.layers.22.self_attn
+model.layers.22.self_attn.q_proj
+model.layers.22.self_attn.k_proj
+model.layers.22.self_attn.v_proj
+model.layers.22.self_attn.o_proj
+model.layers.22.self_attn.rotary_emb
+model.layers.22.mlp
+model.layers.22.mlp.gate_proj
+model.layers.22.mlp.up_proj
+model.layers.22.mlp.down_proj
+model.layers.22.mlp.act_fn
+model.layers.22.input_layernorm
+model.layers.22.post_attention_layernorm
+model.layers.23
+model.layers.23.self_attn
+model.layers.23.self_attn.q_proj
+model.layers.23.self_attn.k_proj
+model.layers.23.self_attn.v_proj
+model.layers.23.self_attn.o_proj
+model.layers.23.self_attn.rotary_emb
+model.layers.23.mlp
+model.layers.23.mlp.gate_proj
+model.layers.23.mlp.up_proj
+model.layers.23.mlp.down_proj
+model.layers.23.mlp.act_fn
+model.layers.23.input_layernorm
+model.layers.23.post_attention_layernorm
+model.layers.24
+model.layers.24.self_attn
+model.layers.24.self_attn.q_proj
+model.layers.24.self_attn.k_proj
+model.layers.24.self_attn.v_proj
+model.layers.24.self_attn.o_proj
+model.layers.24.self_attn.rotary_emb
+model.layers.24.mlp
+model.layers.24.mlp.gate_proj
+model.layers.24.mlp.up_proj
+model.layers.24.mlp.down_proj
+model.layers.24.mlp.act_fn
+model.layers.24.input_layernorm
+model.layers.24.post_attention_layernorm
+model.layers.25
+model.layers.25.self_attn
+model.layers.25.self_attn.q_proj
+model.layers.25.self_attn.k_proj
+model.layers.25.self_attn.v_proj
+model.layers.25.self_attn.o_proj
+model.layers.25.self_attn.rotary_emb
+model.layers.25.mlp
+model.layers.25.mlp.gate_proj
+model.layers.25.mlp.up_proj
+model.layers.25.mlp.down_proj
+model.layers.25.mlp.act_fn
+model.layers.25.input_layernorm
+model.layers.25.post_attention_layernorm
+model.layers.26
+model.layers.26.self_attn
+model.layers.26.self_attn.q_proj
+model.layers.26.self_attn.k_proj
+model.layers.26.self_attn.v_proj
+model.layers.26.self_attn.o_proj
+model.layers.26.self_attn.rotary_emb
+model.layers.26.mlp
+model.layers.26.mlp.gate_proj
+model.layers.26.mlp.up_proj
+model.layers.26.mlp.down_proj
+model.layers.26.mlp.act_fn
+model.layers.26.input_layernorm
+model.layers.26.post_attention_layernorm
+model.layers.27
+model.layers.27.self_attn
+model.layers.27.self_attn.q_proj
+model.layers.27.self_attn.k_proj
+model.layers.27.self_attn.v_proj
+model.layers.27.self_attn.o_proj
+model.layers.27.self_attn.rotary_emb
+model.layers.27.mlp
+model.layers.27.mlp.gate_proj
+model.layers.27.mlp.up_proj
+model.layers.27.mlp.down_proj
+model.layers.27.mlp.act_fn
+model.layers.27.input_layernorm
+model.layers.27.post_attention_layernorm
+model.norm
+model.rotary_emb
+lm_head
diff --git a/logs/Qwen/Qwen2-VL-7B-Instruct.txt b/logs/Qwen/Qwen2-VL-7B-Instruct.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f5942e02b92a279abd57e6f1e1bde8fff7bb86a
--- /dev/null
+++ b/logs/Qwen/Qwen2-VL-7B-Instruct.txt
@@ -0,0 +1,730 @@
+
+visual
+visual.patch_embed
+visual.patch_embed.proj
+visual.rotary_pos_emb
+visual.blocks
+visual.blocks.0
+visual.blocks.0.norm1
+visual.blocks.0.norm2
+visual.blocks.0.attn
+visual.blocks.0.attn.qkv
+visual.blocks.0.attn.proj
+visual.blocks.0.mlp
+visual.blocks.0.mlp.fc1
+visual.blocks.0.mlp.act
+visual.blocks.0.mlp.fc2
+visual.blocks.1
+visual.blocks.1.norm1
+visual.blocks.1.norm2
+visual.blocks.1.attn
+visual.blocks.1.attn.qkv
+visual.blocks.1.attn.proj
+visual.blocks.1.mlp
+visual.blocks.1.mlp.fc1
+visual.blocks.1.mlp.act
+visual.blocks.1.mlp.fc2
+visual.blocks.2
+visual.blocks.2.norm1
+visual.blocks.2.norm2
+visual.blocks.2.attn
+visual.blocks.2.attn.qkv
+visual.blocks.2.attn.proj
+visual.blocks.2.mlp
+visual.blocks.2.mlp.fc1
+visual.blocks.2.mlp.act
+visual.blocks.2.mlp.fc2
+visual.blocks.3
+visual.blocks.3.norm1
+visual.blocks.3.norm2
+visual.blocks.3.attn
+visual.blocks.3.attn.qkv
+visual.blocks.3.attn.proj
+visual.blocks.3.mlp
+visual.blocks.3.mlp.fc1
+visual.blocks.3.mlp.act
+visual.blocks.3.mlp.fc2
+visual.blocks.4
+visual.blocks.4.norm1
+visual.blocks.4.norm2
+visual.blocks.4.attn
+visual.blocks.4.attn.qkv
+visual.blocks.4.attn.proj
+visual.blocks.4.mlp
+visual.blocks.4.mlp.fc1
+visual.blocks.4.mlp.act
+visual.blocks.4.mlp.fc2
+visual.blocks.5
+visual.blocks.5.norm1
+visual.blocks.5.norm2
+visual.blocks.5.attn
+visual.blocks.5.attn.qkv
+visual.blocks.5.attn.proj
+visual.blocks.5.mlp
+visual.blocks.5.mlp.fc1
+visual.blocks.5.mlp.act
+visual.blocks.5.mlp.fc2
+visual.blocks.6
+visual.blocks.6.norm1
+visual.blocks.6.norm2
+visual.blocks.6.attn
+visual.blocks.6.attn.qkv
+visual.blocks.6.attn.proj
+visual.blocks.6.mlp
+visual.blocks.6.mlp.fc1
+visual.blocks.6.mlp.act
+visual.blocks.6.mlp.fc2
+visual.blocks.7
+visual.blocks.7.norm1
+visual.blocks.7.norm2
+visual.blocks.7.attn
+visual.blocks.7.attn.qkv
+visual.blocks.7.attn.proj
+visual.blocks.7.mlp
+visual.blocks.7.mlp.fc1
+visual.blocks.7.mlp.act
+visual.blocks.7.mlp.fc2
+visual.blocks.8
+visual.blocks.8.norm1
+visual.blocks.8.norm2
+visual.blocks.8.attn
+visual.blocks.8.attn.qkv
+visual.blocks.8.attn.proj
+visual.blocks.8.mlp
+visual.blocks.8.mlp.fc1
+visual.blocks.8.mlp.act
+visual.blocks.8.mlp.fc2
+visual.blocks.9
+visual.blocks.9.norm1
+visual.blocks.9.norm2
+visual.blocks.9.attn
+visual.blocks.9.attn.qkv
+visual.blocks.9.attn.proj
+visual.blocks.9.mlp
+visual.blocks.9.mlp.fc1
+visual.blocks.9.mlp.act
+visual.blocks.9.mlp.fc2
+visual.blocks.10
+visual.blocks.10.norm1
+visual.blocks.10.norm2
+visual.blocks.10.attn
+visual.blocks.10.attn.qkv
+visual.blocks.10.attn.proj
+visual.blocks.10.mlp
+visual.blocks.10.mlp.fc1
+visual.blocks.10.mlp.act
+visual.blocks.10.mlp.fc2
+visual.blocks.11
+visual.blocks.11.norm1
+visual.blocks.11.norm2
+visual.blocks.11.attn
+visual.blocks.11.attn.qkv
+visual.blocks.11.attn.proj
+visual.blocks.11.mlp
+visual.blocks.11.mlp.fc1
+visual.blocks.11.mlp.act
+visual.blocks.11.mlp.fc2
+visual.blocks.12
+visual.blocks.12.norm1
+visual.blocks.12.norm2
+visual.blocks.12.attn
+visual.blocks.12.attn.qkv
+visual.blocks.12.attn.proj
+visual.blocks.12.mlp
+visual.blocks.12.mlp.fc1
+visual.blocks.12.mlp.act
+visual.blocks.12.mlp.fc2
+visual.blocks.13
+visual.blocks.13.norm1
+visual.blocks.13.norm2
+visual.blocks.13.attn
+visual.blocks.13.attn.qkv
+visual.blocks.13.attn.proj
+visual.blocks.13.mlp
+visual.blocks.13.mlp.fc1
+visual.blocks.13.mlp.act
+visual.blocks.13.mlp.fc2
+visual.blocks.14
+visual.blocks.14.norm1
+visual.blocks.14.norm2
+visual.blocks.14.attn
+visual.blocks.14.attn.qkv
+visual.blocks.14.attn.proj
+visual.blocks.14.mlp
+visual.blocks.14.mlp.fc1
+visual.blocks.14.mlp.act
+visual.blocks.14.mlp.fc2
+visual.blocks.15
+visual.blocks.15.norm1
+visual.blocks.15.norm2
+visual.blocks.15.attn
+visual.blocks.15.attn.qkv
+visual.blocks.15.attn.proj
+visual.blocks.15.mlp
+visual.blocks.15.mlp.fc1
+visual.blocks.15.mlp.act
+visual.blocks.15.mlp.fc2
+visual.blocks.16
+visual.blocks.16.norm1
+visual.blocks.16.norm2
+visual.blocks.16.attn
+visual.blocks.16.attn.qkv
+visual.blocks.16.attn.proj
+visual.blocks.16.mlp
+visual.blocks.16.mlp.fc1
+visual.blocks.16.mlp.act
+visual.blocks.16.mlp.fc2
+visual.blocks.17
+visual.blocks.17.norm1
+visual.blocks.17.norm2
+visual.blocks.17.attn
+visual.blocks.17.attn.qkv
+visual.blocks.17.attn.proj
+visual.blocks.17.mlp
+visual.blocks.17.mlp.fc1
+visual.blocks.17.mlp.act
+visual.blocks.17.mlp.fc2
+visual.blocks.18
+visual.blocks.18.norm1
+visual.blocks.18.norm2
+visual.blocks.18.attn
+visual.blocks.18.attn.qkv
+visual.blocks.18.attn.proj
+visual.blocks.18.mlp
+visual.blocks.18.mlp.fc1
+visual.blocks.18.mlp.act
+visual.blocks.18.mlp.fc2
+visual.blocks.19
+visual.blocks.19.norm1
+visual.blocks.19.norm2
+visual.blocks.19.attn
+visual.blocks.19.attn.qkv
+visual.blocks.19.attn.proj
+visual.blocks.19.mlp
+visual.blocks.19.mlp.fc1
+visual.blocks.19.mlp.act
+visual.blocks.19.mlp.fc2
+visual.blocks.20
+visual.blocks.20.norm1
+visual.blocks.20.norm2
+visual.blocks.20.attn
+visual.blocks.20.attn.qkv
+visual.blocks.20.attn.proj
+visual.blocks.20.mlp
+visual.blocks.20.mlp.fc1
+visual.blocks.20.mlp.act
+visual.blocks.20.mlp.fc2
+visual.blocks.21
+visual.blocks.21.norm1
+visual.blocks.21.norm2
+visual.blocks.21.attn
+visual.blocks.21.attn.qkv
+visual.blocks.21.attn.proj
+visual.blocks.21.mlp
+visual.blocks.21.mlp.fc1
+visual.blocks.21.mlp.act
+visual.blocks.21.mlp.fc2
+visual.blocks.22
+visual.blocks.22.norm1
+visual.blocks.22.norm2
+visual.blocks.22.attn
+visual.blocks.22.attn.qkv
+visual.blocks.22.attn.proj
+visual.blocks.22.mlp
+visual.blocks.22.mlp.fc1
+visual.blocks.22.mlp.act
+visual.blocks.22.mlp.fc2
+visual.blocks.23
+visual.blocks.23.norm1
+visual.blocks.23.norm2
+visual.blocks.23.attn
+visual.blocks.23.attn.qkv
+visual.blocks.23.attn.proj
+visual.blocks.23.mlp
+visual.blocks.23.mlp.fc1
+visual.blocks.23.mlp.act
+visual.blocks.23.mlp.fc2
+visual.blocks.24
+visual.blocks.24.norm1
+visual.blocks.24.norm2
+visual.blocks.24.attn
+visual.blocks.24.attn.qkv
+visual.blocks.24.attn.proj
+visual.blocks.24.mlp
+visual.blocks.24.mlp.fc1
+visual.blocks.24.mlp.act
+visual.blocks.24.mlp.fc2
+visual.blocks.25
+visual.blocks.25.norm1
+visual.blocks.25.norm2
+visual.blocks.25.attn
+visual.blocks.25.attn.qkv
+visual.blocks.25.attn.proj
+visual.blocks.25.mlp
+visual.blocks.25.mlp.fc1
+visual.blocks.25.mlp.act
+visual.blocks.25.mlp.fc2
+visual.blocks.26
+visual.blocks.26.norm1
+visual.blocks.26.norm2
+visual.blocks.26.attn
+visual.blocks.26.attn.qkv
+visual.blocks.26.attn.proj
+visual.blocks.26.mlp
+visual.blocks.26.mlp.fc1
+visual.blocks.26.mlp.act
+visual.blocks.26.mlp.fc2
+visual.blocks.27
+visual.blocks.27.norm1
+visual.blocks.27.norm2
+visual.blocks.27.attn
+visual.blocks.27.attn.qkv
+visual.blocks.27.attn.proj
+visual.blocks.27.mlp
+visual.blocks.27.mlp.fc1
+visual.blocks.27.mlp.act
+visual.blocks.27.mlp.fc2
+visual.blocks.28
+visual.blocks.28.norm1
+visual.blocks.28.norm2
+visual.blocks.28.attn
+visual.blocks.28.attn.qkv
+visual.blocks.28.attn.proj
+visual.blocks.28.mlp
+visual.blocks.28.mlp.fc1
+visual.blocks.28.mlp.act
+visual.blocks.28.mlp.fc2
+visual.blocks.29
+visual.blocks.29.norm1
+visual.blocks.29.norm2
+visual.blocks.29.attn
+visual.blocks.29.attn.qkv
+visual.blocks.29.attn.proj
+visual.blocks.29.mlp
+visual.blocks.29.mlp.fc1
+visual.blocks.29.mlp.act
+visual.blocks.29.mlp.fc2
+visual.blocks.30
+visual.blocks.30.norm1
+visual.blocks.30.norm2
+visual.blocks.30.attn
+visual.blocks.30.attn.qkv
+visual.blocks.30.attn.proj
+visual.blocks.30.mlp
+visual.blocks.30.mlp.fc1
+visual.blocks.30.mlp.act
+visual.blocks.30.mlp.fc2
+visual.blocks.31
+visual.blocks.31.norm1
+visual.blocks.31.norm2
+visual.blocks.31.attn
+visual.blocks.31.attn.qkv
+visual.blocks.31.attn.proj
+visual.blocks.31.mlp
+visual.blocks.31.mlp.fc1
+visual.blocks.31.mlp.act
+visual.blocks.31.mlp.fc2
+visual.merger
+visual.merger.ln_q
+visual.merger.mlp
+visual.merger.mlp.0
+visual.merger.mlp.1
+visual.merger.mlp.2
+model
+model.embed_tokens
+model.layers
+model.layers.0
+model.layers.0.self_attn
+model.layers.0.self_attn.q_proj
+model.layers.0.self_attn.k_proj
+model.layers.0.self_attn.v_proj
+model.layers.0.self_attn.o_proj
+model.layers.0.self_attn.rotary_emb
+model.layers.0.mlp
+model.layers.0.mlp.gate_proj
+model.layers.0.mlp.up_proj
+model.layers.0.mlp.down_proj
+model.layers.0.mlp.act_fn
+model.layers.0.input_layernorm
+model.layers.0.post_attention_layernorm
+model.layers.1
+model.layers.1.self_attn
+model.layers.1.self_attn.q_proj
+model.layers.1.self_attn.k_proj
+model.layers.1.self_attn.v_proj
+model.layers.1.self_attn.o_proj
+model.layers.1.self_attn.rotary_emb
+model.layers.1.mlp
+model.layers.1.mlp.gate_proj
+model.layers.1.mlp.up_proj
+model.layers.1.mlp.down_proj
+model.layers.1.mlp.act_fn
+model.layers.1.input_layernorm
+model.layers.1.post_attention_layernorm
+model.layers.2
+model.layers.2.self_attn
+model.layers.2.self_attn.q_proj
+model.layers.2.self_attn.k_proj
+model.layers.2.self_attn.v_proj
+model.layers.2.self_attn.o_proj
+model.layers.2.self_attn.rotary_emb
+model.layers.2.mlp
+model.layers.2.mlp.gate_proj
+model.layers.2.mlp.up_proj
+model.layers.2.mlp.down_proj
+model.layers.2.mlp.act_fn
+model.layers.2.input_layernorm
+model.layers.2.post_attention_layernorm
+model.layers.3
+model.layers.3.self_attn
+model.layers.3.self_attn.q_proj
+model.layers.3.self_attn.k_proj
+model.layers.3.self_attn.v_proj
+model.layers.3.self_attn.o_proj
+model.layers.3.self_attn.rotary_emb
+model.layers.3.mlp
+model.layers.3.mlp.gate_proj
+model.layers.3.mlp.up_proj
+model.layers.3.mlp.down_proj
+model.layers.3.mlp.act_fn
+model.layers.3.input_layernorm
+model.layers.3.post_attention_layernorm
+model.layers.4
+model.layers.4.self_attn
+model.layers.4.self_attn.q_proj
+model.layers.4.self_attn.k_proj
+model.layers.4.self_attn.v_proj
+model.layers.4.self_attn.o_proj
+model.layers.4.self_attn.rotary_emb
+model.layers.4.mlp
+model.layers.4.mlp.gate_proj
+model.layers.4.mlp.up_proj
+model.layers.4.mlp.down_proj
+model.layers.4.mlp.act_fn
+model.layers.4.input_layernorm
+model.layers.4.post_attention_layernorm
+model.layers.5
+model.layers.5.self_attn
+model.layers.5.self_attn.q_proj
+model.layers.5.self_attn.k_proj
+model.layers.5.self_attn.v_proj
+model.layers.5.self_attn.o_proj
+model.layers.5.self_attn.rotary_emb
+model.layers.5.mlp
+model.layers.5.mlp.gate_proj
+model.layers.5.mlp.up_proj
+model.layers.5.mlp.down_proj
+model.layers.5.mlp.act_fn
+model.layers.5.input_layernorm
+model.layers.5.post_attention_layernorm
+model.layers.6
+model.layers.6.self_attn
+model.layers.6.self_attn.q_proj
+model.layers.6.self_attn.k_proj
+model.layers.6.self_attn.v_proj
+model.layers.6.self_attn.o_proj
+model.layers.6.self_attn.rotary_emb
+model.layers.6.mlp
+model.layers.6.mlp.gate_proj
+model.layers.6.mlp.up_proj
+model.layers.6.mlp.down_proj
+model.layers.6.mlp.act_fn
+model.layers.6.input_layernorm
+model.layers.6.post_attention_layernorm
+model.layers.7
+model.layers.7.self_attn
+model.layers.7.self_attn.q_proj
+model.layers.7.self_attn.k_proj
+model.layers.7.self_attn.v_proj
+model.layers.7.self_attn.o_proj
+model.layers.7.self_attn.rotary_emb
+model.layers.7.mlp
+model.layers.7.mlp.gate_proj
+model.layers.7.mlp.up_proj
+model.layers.7.mlp.down_proj
+model.layers.7.mlp.act_fn
+model.layers.7.input_layernorm
+model.layers.7.post_attention_layernorm
+model.layers.8
+model.layers.8.self_attn
+model.layers.8.self_attn.q_proj
+model.layers.8.self_attn.k_proj
+model.layers.8.self_attn.v_proj
+model.layers.8.self_attn.o_proj
+model.layers.8.self_attn.rotary_emb
+model.layers.8.mlp
+model.layers.8.mlp.gate_proj
+model.layers.8.mlp.up_proj
+model.layers.8.mlp.down_proj
+model.layers.8.mlp.act_fn
+model.layers.8.input_layernorm
+model.layers.8.post_attention_layernorm
+model.layers.9
+model.layers.9.self_attn
+model.layers.9.self_attn.q_proj
+model.layers.9.self_attn.k_proj
+model.layers.9.self_attn.v_proj
+model.layers.9.self_attn.o_proj
+model.layers.9.self_attn.rotary_emb
+model.layers.9.mlp
+model.layers.9.mlp.gate_proj
+model.layers.9.mlp.up_proj
+model.layers.9.mlp.down_proj
+model.layers.9.mlp.act_fn
+model.layers.9.input_layernorm
+model.layers.9.post_attention_layernorm
+model.layers.10
+model.layers.10.self_attn
+model.layers.10.self_attn.q_proj
+model.layers.10.self_attn.k_proj
+model.layers.10.self_attn.v_proj
+model.layers.10.self_attn.o_proj
+model.layers.10.self_attn.rotary_emb
+model.layers.10.mlp
+model.layers.10.mlp.gate_proj
+model.layers.10.mlp.up_proj
+model.layers.10.mlp.down_proj
+model.layers.10.mlp.act_fn
+model.layers.10.input_layernorm
+model.layers.10.post_attention_layernorm
+model.layers.11
+model.layers.11.self_attn
+model.layers.11.self_attn.q_proj
+model.layers.11.self_attn.k_proj
+model.layers.11.self_attn.v_proj
+model.layers.11.self_attn.o_proj
+model.layers.11.self_attn.rotary_emb
+model.layers.11.mlp
+model.layers.11.mlp.gate_proj
+model.layers.11.mlp.up_proj
+model.layers.11.mlp.down_proj
+model.layers.11.mlp.act_fn
+model.layers.11.input_layernorm
+model.layers.11.post_attention_layernorm
+model.layers.12
+model.layers.12.self_attn
+model.layers.12.self_attn.q_proj
+model.layers.12.self_attn.k_proj
+model.layers.12.self_attn.v_proj
+model.layers.12.self_attn.o_proj
+model.layers.12.self_attn.rotary_emb
+model.layers.12.mlp
+model.layers.12.mlp.gate_proj
+model.layers.12.mlp.up_proj
+model.layers.12.mlp.down_proj
+model.layers.12.mlp.act_fn
+model.layers.12.input_layernorm
+model.layers.12.post_attention_layernorm
+model.layers.13
+model.layers.13.self_attn
+model.layers.13.self_attn.q_proj
+model.layers.13.self_attn.k_proj
+model.layers.13.self_attn.v_proj
+model.layers.13.self_attn.o_proj
+model.layers.13.self_attn.rotary_emb
+model.layers.13.mlp
+model.layers.13.mlp.gate_proj
+model.layers.13.mlp.up_proj
+model.layers.13.mlp.down_proj
+model.layers.13.mlp.act_fn
+model.layers.13.input_layernorm
+model.layers.13.post_attention_layernorm
+model.layers.14
+model.layers.14.self_attn
+model.layers.14.self_attn.q_proj
+model.layers.14.self_attn.k_proj
+model.layers.14.self_attn.v_proj
+model.layers.14.self_attn.o_proj
+model.layers.14.self_attn.rotary_emb
+model.layers.14.mlp
+model.layers.14.mlp.gate_proj
+model.layers.14.mlp.up_proj
+model.layers.14.mlp.down_proj
+model.layers.14.mlp.act_fn
+model.layers.14.input_layernorm
+model.layers.14.post_attention_layernorm
+model.layers.15
+model.layers.15.self_attn
+model.layers.15.self_attn.q_proj
+model.layers.15.self_attn.k_proj
+model.layers.15.self_attn.v_proj
+model.layers.15.self_attn.o_proj
+model.layers.15.self_attn.rotary_emb
+model.layers.15.mlp
+model.layers.15.mlp.gate_proj
+model.layers.15.mlp.up_proj
+model.layers.15.mlp.down_proj
+model.layers.15.mlp.act_fn
+model.layers.15.input_layernorm
+model.layers.15.post_attention_layernorm
+model.layers.16
+model.layers.16.self_attn
+model.layers.16.self_attn.q_proj
+model.layers.16.self_attn.k_proj
+model.layers.16.self_attn.v_proj
+model.layers.16.self_attn.o_proj
+model.layers.16.self_attn.rotary_emb
+model.layers.16.mlp
+model.layers.16.mlp.gate_proj
+model.layers.16.mlp.up_proj
+model.layers.16.mlp.down_proj
+model.layers.16.mlp.act_fn
+model.layers.16.input_layernorm
+model.layers.16.post_attention_layernorm
+model.layers.17
+model.layers.17.self_attn
+model.layers.17.self_attn.q_proj
+model.layers.17.self_attn.k_proj
+model.layers.17.self_attn.v_proj
+model.layers.17.self_attn.o_proj
+model.layers.17.self_attn.rotary_emb
+model.layers.17.mlp
+model.layers.17.mlp.gate_proj
+model.layers.17.mlp.up_proj
+model.layers.17.mlp.down_proj
+model.layers.17.mlp.act_fn
+model.layers.17.input_layernorm
+model.layers.17.post_attention_layernorm
+model.layers.18
+model.layers.18.self_attn
+model.layers.18.self_attn.q_proj
+model.layers.18.self_attn.k_proj
+model.layers.18.self_attn.v_proj
+model.layers.18.self_attn.o_proj
+model.layers.18.self_attn.rotary_emb
+model.layers.18.mlp
+model.layers.18.mlp.gate_proj
+model.layers.18.mlp.up_proj
+model.layers.18.mlp.down_proj
+model.layers.18.mlp.act_fn
+model.layers.18.input_layernorm
+model.layers.18.post_attention_layernorm
+model.layers.19
+model.layers.19.self_attn
+model.layers.19.self_attn.q_proj
+model.layers.19.self_attn.k_proj
+model.layers.19.self_attn.v_proj
+model.layers.19.self_attn.o_proj
+model.layers.19.self_attn.rotary_emb
+model.layers.19.mlp
+model.layers.19.mlp.gate_proj
+model.layers.19.mlp.up_proj
+model.layers.19.mlp.down_proj
+model.layers.19.mlp.act_fn
+model.layers.19.input_layernorm
+model.layers.19.post_attention_layernorm
+model.layers.20
+model.layers.20.self_attn
+model.layers.20.self_attn.q_proj
+model.layers.20.self_attn.k_proj
+model.layers.20.self_attn.v_proj
+model.layers.20.self_attn.o_proj
+model.layers.20.self_attn.rotary_emb
+model.layers.20.mlp
+model.layers.20.mlp.gate_proj
+model.layers.20.mlp.up_proj
+model.layers.20.mlp.down_proj
+model.layers.20.mlp.act_fn
+model.layers.20.input_layernorm
+model.layers.20.post_attention_layernorm
+model.layers.21
+model.layers.21.self_attn
+model.layers.21.self_attn.q_proj
+model.layers.21.self_attn.k_proj
+model.layers.21.self_attn.v_proj
+model.layers.21.self_attn.o_proj
+model.layers.21.self_attn.rotary_emb
+model.layers.21.mlp
+model.layers.21.mlp.gate_proj
+model.layers.21.mlp.up_proj
+model.layers.21.mlp.down_proj
+model.layers.21.mlp.act_fn
+model.layers.21.input_layernorm
+model.layers.21.post_attention_layernorm
+model.layers.22
+model.layers.22.self_attn
+model.layers.22.self_attn.q_proj
+model.layers.22.self_attn.k_proj
+model.layers.22.self_attn.v_proj
+model.layers.22.self_attn.o_proj
+model.layers.22.self_attn.rotary_emb
+model.layers.22.mlp
+model.layers.22.mlp.gate_proj
+model.layers.22.mlp.up_proj
+model.layers.22.mlp.down_proj
+model.layers.22.mlp.act_fn
+model.layers.22.input_layernorm
+model.layers.22.post_attention_layernorm
+model.layers.23
+model.layers.23.self_attn
+model.layers.23.self_attn.q_proj
+model.layers.23.self_attn.k_proj
+model.layers.23.self_attn.v_proj
+model.layers.23.self_attn.o_proj
+model.layers.23.self_attn.rotary_emb
+model.layers.23.mlp
+model.layers.23.mlp.gate_proj
+model.layers.23.mlp.up_proj
+model.layers.23.mlp.down_proj
+model.layers.23.mlp.act_fn
+model.layers.23.input_layernorm
+model.layers.23.post_attention_layernorm
+model.layers.24
+model.layers.24.self_attn
+model.layers.24.self_attn.q_proj
+model.layers.24.self_attn.k_proj
+model.layers.24.self_attn.v_proj
+model.layers.24.self_attn.o_proj
+model.layers.24.self_attn.rotary_emb
+model.layers.24.mlp
+model.layers.24.mlp.gate_proj
+model.layers.24.mlp.up_proj
+model.layers.24.mlp.down_proj
+model.layers.24.mlp.act_fn
+model.layers.24.input_layernorm
+model.layers.24.post_attention_layernorm
+model.layers.25
+model.layers.25.self_attn
+model.layers.25.self_attn.q_proj
+model.layers.25.self_attn.k_proj
+model.layers.25.self_attn.v_proj
+model.layers.25.self_attn.o_proj
+model.layers.25.self_attn.rotary_emb
+model.layers.25.mlp
+model.layers.25.mlp.gate_proj
+model.layers.25.mlp.up_proj
+model.layers.25.mlp.down_proj
+model.layers.25.mlp.act_fn
+model.layers.25.input_layernorm
+model.layers.25.post_attention_layernorm
+model.layers.26
+model.layers.26.self_attn
+model.layers.26.self_attn.q_proj
+model.layers.26.self_attn.k_proj
+model.layers.26.self_attn.v_proj
+model.layers.26.self_attn.o_proj
+model.layers.26.self_attn.rotary_emb
+model.layers.26.mlp
+model.layers.26.mlp.gate_proj
+model.layers.26.mlp.up_proj
+model.layers.26.mlp.down_proj
+model.layers.26.mlp.act_fn
+model.layers.26.input_layernorm
+model.layers.26.post_attention_layernorm
+model.layers.27
+model.layers.27.self_attn
+model.layers.27.self_attn.q_proj
+model.layers.27.self_attn.k_proj
+model.layers.27.self_attn.v_proj
+model.layers.27.self_attn.o_proj
+model.layers.27.self_attn.rotary_emb
+model.layers.27.mlp
+model.layers.27.mlp.gate_proj
+model.layers.27.mlp.up_proj
+model.layers.27.mlp.down_proj
+model.layers.27.mlp.act_fn
+model.layers.27.input_layernorm
+model.layers.27.post_attention_layernorm
+model.norm
+model.rotary_emb
+lm_head
diff --git a/logs/Salesforce/blip2-opt-2.7b.txt b/logs/Salesforce/blip2-opt-2.7b.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f7497d602aab0616712063d343c3ceb6de0f1af8
--- /dev/null
+++ b/logs/Salesforce/blip2-opt-2.7b.txt
@@ -0,0 +1,1078 @@
+
+vision_model
+vision_model.embeddings
+vision_model.embeddings.patch_embedding
+vision_model.encoder
+vision_model.encoder.layers
+vision_model.encoder.layers.0
+vision_model.encoder.layers.0.self_attn
+vision_model.encoder.layers.0.self_attn.dropout
+vision_model.encoder.layers.0.self_attn.qkv
+vision_model.encoder.layers.0.self_attn.projection
+vision_model.encoder.layers.0.layer_norm1
+vision_model.encoder.layers.0.mlp
+vision_model.encoder.layers.0.mlp.activation_fn
+vision_model.encoder.layers.0.mlp.fc1
+vision_model.encoder.layers.0.mlp.fc2
+vision_model.encoder.layers.0.layer_norm2
+vision_model.encoder.layers.1
+vision_model.encoder.layers.1.self_attn
+vision_model.encoder.layers.1.self_attn.dropout
+vision_model.encoder.layers.1.self_attn.qkv
+vision_model.encoder.layers.1.self_attn.projection
+vision_model.encoder.layers.1.layer_norm1
+vision_model.encoder.layers.1.mlp
+vision_model.encoder.layers.1.mlp.activation_fn
+vision_model.encoder.layers.1.mlp.fc1
+vision_model.encoder.layers.1.mlp.fc2
+vision_model.encoder.layers.1.layer_norm2
+vision_model.encoder.layers.2
+vision_model.encoder.layers.2.self_attn
+vision_model.encoder.layers.2.self_attn.dropout
+vision_model.encoder.layers.2.self_attn.qkv
+vision_model.encoder.layers.2.self_attn.projection
+vision_model.encoder.layers.2.layer_norm1
+vision_model.encoder.layers.2.mlp
+vision_model.encoder.layers.2.mlp.activation_fn
+vision_model.encoder.layers.2.mlp.fc1
+vision_model.encoder.layers.2.mlp.fc2
+vision_model.encoder.layers.2.layer_norm2
+vision_model.encoder.layers.3
+vision_model.encoder.layers.3.self_attn
+vision_model.encoder.layers.3.self_attn.dropout
+vision_model.encoder.layers.3.self_attn.qkv
+vision_model.encoder.layers.3.self_attn.projection
+vision_model.encoder.layers.3.layer_norm1
+vision_model.encoder.layers.3.mlp
+vision_model.encoder.layers.3.mlp.activation_fn
+vision_model.encoder.layers.3.mlp.fc1
+vision_model.encoder.layers.3.mlp.fc2
+vision_model.encoder.layers.3.layer_norm2
+vision_model.encoder.layers.4
+vision_model.encoder.layers.4.self_attn
+vision_model.encoder.layers.4.self_attn.dropout
+vision_model.encoder.layers.4.self_attn.qkv
+vision_model.encoder.layers.4.self_attn.projection
+vision_model.encoder.layers.4.layer_norm1
+vision_model.encoder.layers.4.mlp
+vision_model.encoder.layers.4.mlp.activation_fn
+vision_model.encoder.layers.4.mlp.fc1
+vision_model.encoder.layers.4.mlp.fc2
+vision_model.encoder.layers.4.layer_norm2
+vision_model.encoder.layers.5
+vision_model.encoder.layers.5.self_attn
+vision_model.encoder.layers.5.self_attn.dropout
+vision_model.encoder.layers.5.self_attn.qkv
+vision_model.encoder.layers.5.self_attn.projection
+vision_model.encoder.layers.5.layer_norm1
+vision_model.encoder.layers.5.mlp
+vision_model.encoder.layers.5.mlp.activation_fn
+vision_model.encoder.layers.5.mlp.fc1
+vision_model.encoder.layers.5.mlp.fc2
+vision_model.encoder.layers.5.layer_norm2
+vision_model.encoder.layers.6
+vision_model.encoder.layers.6.self_attn
+vision_model.encoder.layers.6.self_attn.dropout
+vision_model.encoder.layers.6.self_attn.qkv
+vision_model.encoder.layers.6.self_attn.projection
+vision_model.encoder.layers.6.layer_norm1
+vision_model.encoder.layers.6.mlp
+vision_model.encoder.layers.6.mlp.activation_fn
+vision_model.encoder.layers.6.mlp.fc1
+vision_model.encoder.layers.6.mlp.fc2
+vision_model.encoder.layers.6.layer_norm2
+vision_model.encoder.layers.7
+vision_model.encoder.layers.7.self_attn
+vision_model.encoder.layers.7.self_attn.dropout
+vision_model.encoder.layers.7.self_attn.qkv
+vision_model.encoder.layers.7.self_attn.projection
+vision_model.encoder.layers.7.layer_norm1
+vision_model.encoder.layers.7.mlp
+vision_model.encoder.layers.7.mlp.activation_fn
+vision_model.encoder.layers.7.mlp.fc1
+vision_model.encoder.layers.7.mlp.fc2
+vision_model.encoder.layers.7.layer_norm2
+vision_model.encoder.layers.8
+vision_model.encoder.layers.8.self_attn
+vision_model.encoder.layers.8.self_attn.dropout
+vision_model.encoder.layers.8.self_attn.qkv
+vision_model.encoder.layers.8.self_attn.projection
+vision_model.encoder.layers.8.layer_norm1
+vision_model.encoder.layers.8.mlp
+vision_model.encoder.layers.8.mlp.activation_fn
+vision_model.encoder.layers.8.mlp.fc1
+vision_model.encoder.layers.8.mlp.fc2
+vision_model.encoder.layers.8.layer_norm2
+vision_model.encoder.layers.9
+vision_model.encoder.layers.9.self_attn
+vision_model.encoder.layers.9.self_attn.dropout
+vision_model.encoder.layers.9.self_attn.qkv
+vision_model.encoder.layers.9.self_attn.projection
+vision_model.encoder.layers.9.layer_norm1
+vision_model.encoder.layers.9.mlp
+vision_model.encoder.layers.9.mlp.activation_fn
+vision_model.encoder.layers.9.mlp.fc1
+vision_model.encoder.layers.9.mlp.fc2
+vision_model.encoder.layers.9.layer_norm2
+vision_model.encoder.layers.10
+vision_model.encoder.layers.10.self_attn
+vision_model.encoder.layers.10.self_attn.dropout
+vision_model.encoder.layers.10.self_attn.qkv
+vision_model.encoder.layers.10.self_attn.projection
+vision_model.encoder.layers.10.layer_norm1
+vision_model.encoder.layers.10.mlp
+vision_model.encoder.layers.10.mlp.activation_fn
+vision_model.encoder.layers.10.mlp.fc1
+vision_model.encoder.layers.10.mlp.fc2
+vision_model.encoder.layers.10.layer_norm2
+vision_model.encoder.layers.11
+vision_model.encoder.layers.11.self_attn
+vision_model.encoder.layers.11.self_attn.dropout
+vision_model.encoder.layers.11.self_attn.qkv
+vision_model.encoder.layers.11.self_attn.projection
+vision_model.encoder.layers.11.layer_norm1
+vision_model.encoder.layers.11.mlp
+vision_model.encoder.layers.11.mlp.activation_fn
+vision_model.encoder.layers.11.mlp.fc1
+vision_model.encoder.layers.11.mlp.fc2
+vision_model.encoder.layers.11.layer_norm2
+vision_model.encoder.layers.12
+vision_model.encoder.layers.12.self_attn
+vision_model.encoder.layers.12.self_attn.dropout
+vision_model.encoder.layers.12.self_attn.qkv
+vision_model.encoder.layers.12.self_attn.projection
+vision_model.encoder.layers.12.layer_norm1
+vision_model.encoder.layers.12.mlp
+vision_model.encoder.layers.12.mlp.activation_fn
+vision_model.encoder.layers.12.mlp.fc1
+vision_model.encoder.layers.12.mlp.fc2
+vision_model.encoder.layers.12.layer_norm2
+vision_model.encoder.layers.13
+vision_model.encoder.layers.13.self_attn
+vision_model.encoder.layers.13.self_attn.dropout
+vision_model.encoder.layers.13.self_attn.qkv
+vision_model.encoder.layers.13.self_attn.projection
+vision_model.encoder.layers.13.layer_norm1
+vision_model.encoder.layers.13.mlp
+vision_model.encoder.layers.13.mlp.activation_fn
+vision_model.encoder.layers.13.mlp.fc1
+vision_model.encoder.layers.13.mlp.fc2
+vision_model.encoder.layers.13.layer_norm2
+vision_model.encoder.layers.14
+vision_model.encoder.layers.14.self_attn
+vision_model.encoder.layers.14.self_attn.dropout
+vision_model.encoder.layers.14.self_attn.qkv
+vision_model.encoder.layers.14.self_attn.projection
+vision_model.encoder.layers.14.layer_norm1
+vision_model.encoder.layers.14.mlp
+vision_model.encoder.layers.14.mlp.activation_fn
+vision_model.encoder.layers.14.mlp.fc1
+vision_model.encoder.layers.14.mlp.fc2
+vision_model.encoder.layers.14.layer_norm2
+vision_model.encoder.layers.15
+vision_model.encoder.layers.15.self_attn
+vision_model.encoder.layers.15.self_attn.dropout
+vision_model.encoder.layers.15.self_attn.qkv
+vision_model.encoder.layers.15.self_attn.projection
+vision_model.encoder.layers.15.layer_norm1
+vision_model.encoder.layers.15.mlp
+vision_model.encoder.layers.15.mlp.activation_fn
+vision_model.encoder.layers.15.mlp.fc1
+vision_model.encoder.layers.15.mlp.fc2
+vision_model.encoder.layers.15.layer_norm2
+vision_model.encoder.layers.16
+vision_model.encoder.layers.16.self_attn
+vision_model.encoder.layers.16.self_attn.dropout
+vision_model.encoder.layers.16.self_attn.qkv
+vision_model.encoder.layers.16.self_attn.projection
+vision_model.encoder.layers.16.layer_norm1
+vision_model.encoder.layers.16.mlp
+vision_model.encoder.layers.16.mlp.activation_fn
+vision_model.encoder.layers.16.mlp.fc1
+vision_model.encoder.layers.16.mlp.fc2
+vision_model.encoder.layers.16.layer_norm2
+vision_model.encoder.layers.17
+vision_model.encoder.layers.17.self_attn
+vision_model.encoder.layers.17.self_attn.dropout
+vision_model.encoder.layers.17.self_attn.qkv
+vision_model.encoder.layers.17.self_attn.projection
+vision_model.encoder.layers.17.layer_norm1
+vision_model.encoder.layers.17.mlp
+vision_model.encoder.layers.17.mlp.activation_fn
+vision_model.encoder.layers.17.mlp.fc1
+vision_model.encoder.layers.17.mlp.fc2
+vision_model.encoder.layers.17.layer_norm2
+vision_model.encoder.layers.18
+vision_model.encoder.layers.18.self_attn
+vision_model.encoder.layers.18.self_attn.dropout
+vision_model.encoder.layers.18.self_attn.qkv
+vision_model.encoder.layers.18.self_attn.projection
+vision_model.encoder.layers.18.layer_norm1
+vision_model.encoder.layers.18.mlp
+vision_model.encoder.layers.18.mlp.activation_fn
+vision_model.encoder.layers.18.mlp.fc1
+vision_model.encoder.layers.18.mlp.fc2
+vision_model.encoder.layers.18.layer_norm2
+vision_model.encoder.layers.19
+vision_model.encoder.layers.19.self_attn
+vision_model.encoder.layers.19.self_attn.dropout
+vision_model.encoder.layers.19.self_attn.qkv
+vision_model.encoder.layers.19.self_attn.projection
+vision_model.encoder.layers.19.layer_norm1
+vision_model.encoder.layers.19.mlp
+vision_model.encoder.layers.19.mlp.activation_fn
+vision_model.encoder.layers.19.mlp.fc1
+vision_model.encoder.layers.19.mlp.fc2
+vision_model.encoder.layers.19.layer_norm2
+vision_model.encoder.layers.20
+vision_model.encoder.layers.20.self_attn
+vision_model.encoder.layers.20.self_attn.dropout
+vision_model.encoder.layers.20.self_attn.qkv
+vision_model.encoder.layers.20.self_attn.projection
+vision_model.encoder.layers.20.layer_norm1
+vision_model.encoder.layers.20.mlp
+vision_model.encoder.layers.20.mlp.activation_fn
+vision_model.encoder.layers.20.mlp.fc1
+vision_model.encoder.layers.20.mlp.fc2
+vision_model.encoder.layers.20.layer_norm2
+vision_model.encoder.layers.21
+vision_model.encoder.layers.21.self_attn
+vision_model.encoder.layers.21.self_attn.dropout
+vision_model.encoder.layers.21.self_attn.qkv
+vision_model.encoder.layers.21.self_attn.projection
+vision_model.encoder.layers.21.layer_norm1
+vision_model.encoder.layers.21.mlp
+vision_model.encoder.layers.21.mlp.activation_fn
+vision_model.encoder.layers.21.mlp.fc1
+vision_model.encoder.layers.21.mlp.fc2
+vision_model.encoder.layers.21.layer_norm2
+vision_model.encoder.layers.22
+vision_model.encoder.layers.22.self_attn
+vision_model.encoder.layers.22.self_attn.dropout
+vision_model.encoder.layers.22.self_attn.qkv
+vision_model.encoder.layers.22.self_attn.projection
+vision_model.encoder.layers.22.layer_norm1
+vision_model.encoder.layers.22.mlp
+vision_model.encoder.layers.22.mlp.activation_fn
+vision_model.encoder.layers.22.mlp.fc1
+vision_model.encoder.layers.22.mlp.fc2
+vision_model.encoder.layers.22.layer_norm2
+vision_model.encoder.layers.23
+vision_model.encoder.layers.23.self_attn
+vision_model.encoder.layers.23.self_attn.dropout
+vision_model.encoder.layers.23.self_attn.qkv
+vision_model.encoder.layers.23.self_attn.projection
+vision_model.encoder.layers.23.layer_norm1
+vision_model.encoder.layers.23.mlp
+vision_model.encoder.layers.23.mlp.activation_fn
+vision_model.encoder.layers.23.mlp.fc1
+vision_model.encoder.layers.23.mlp.fc2
+vision_model.encoder.layers.23.layer_norm2
+vision_model.encoder.layers.24
+vision_model.encoder.layers.24.self_attn
+vision_model.encoder.layers.24.self_attn.dropout
+vision_model.encoder.layers.24.self_attn.qkv
+vision_model.encoder.layers.24.self_attn.projection
+vision_model.encoder.layers.24.layer_norm1
+vision_model.encoder.layers.24.mlp
+vision_model.encoder.layers.24.mlp.activation_fn
+vision_model.encoder.layers.24.mlp.fc1
+vision_model.encoder.layers.24.mlp.fc2
+vision_model.encoder.layers.24.layer_norm2
+vision_model.encoder.layers.25
+vision_model.encoder.layers.25.self_attn
+vision_model.encoder.layers.25.self_attn.dropout
+vision_model.encoder.layers.25.self_attn.qkv
+vision_model.encoder.layers.25.self_attn.projection
+vision_model.encoder.layers.25.layer_norm1
+vision_model.encoder.layers.25.mlp
+vision_model.encoder.layers.25.mlp.activation_fn
+vision_model.encoder.layers.25.mlp.fc1
+vision_model.encoder.layers.25.mlp.fc2
+vision_model.encoder.layers.25.layer_norm2
+vision_model.encoder.layers.26
+vision_model.encoder.layers.26.self_attn
+vision_model.encoder.layers.26.self_attn.dropout
+vision_model.encoder.layers.26.self_attn.qkv
+vision_model.encoder.layers.26.self_attn.projection
+vision_model.encoder.layers.26.layer_norm1
+vision_model.encoder.layers.26.mlp
+vision_model.encoder.layers.26.mlp.activation_fn
+vision_model.encoder.layers.26.mlp.fc1
+vision_model.encoder.layers.26.mlp.fc2
+vision_model.encoder.layers.26.layer_norm2
+vision_model.encoder.layers.27
+vision_model.encoder.layers.27.self_attn
+vision_model.encoder.layers.27.self_attn.dropout
+vision_model.encoder.layers.27.self_attn.qkv
+vision_model.encoder.layers.27.self_attn.projection
+vision_model.encoder.layers.27.layer_norm1
+vision_model.encoder.layers.27.mlp
+vision_model.encoder.layers.27.mlp.activation_fn
+vision_model.encoder.layers.27.mlp.fc1
+vision_model.encoder.layers.27.mlp.fc2
+vision_model.encoder.layers.27.layer_norm2
+vision_model.encoder.layers.28
+vision_model.encoder.layers.28.self_attn
+vision_model.encoder.layers.28.self_attn.dropout
+vision_model.encoder.layers.28.self_attn.qkv
+vision_model.encoder.layers.28.self_attn.projection
+vision_model.encoder.layers.28.layer_norm1
+vision_model.encoder.layers.28.mlp
+vision_model.encoder.layers.28.mlp.activation_fn
+vision_model.encoder.layers.28.mlp.fc1
+vision_model.encoder.layers.28.mlp.fc2
+vision_model.encoder.layers.28.layer_norm2
+vision_model.encoder.layers.29
+vision_model.encoder.layers.29.self_attn
+vision_model.encoder.layers.29.self_attn.dropout
+vision_model.encoder.layers.29.self_attn.qkv
+vision_model.encoder.layers.29.self_attn.projection
+vision_model.encoder.layers.29.layer_norm1
+vision_model.encoder.layers.29.mlp
+vision_model.encoder.layers.29.mlp.activation_fn
+vision_model.encoder.layers.29.mlp.fc1
+vision_model.encoder.layers.29.mlp.fc2
+vision_model.encoder.layers.29.layer_norm2
+vision_model.encoder.layers.30
+vision_model.encoder.layers.30.self_attn
+vision_model.encoder.layers.30.self_attn.dropout
+vision_model.encoder.layers.30.self_attn.qkv
+vision_model.encoder.layers.30.self_attn.projection
+vision_model.encoder.layers.30.layer_norm1
+vision_model.encoder.layers.30.mlp
+vision_model.encoder.layers.30.mlp.activation_fn
+vision_model.encoder.layers.30.mlp.fc1
+vision_model.encoder.layers.30.mlp.fc2
+vision_model.encoder.layers.30.layer_norm2
+vision_model.encoder.layers.31
+vision_model.encoder.layers.31.self_attn
+vision_model.encoder.layers.31.self_attn.dropout
+vision_model.encoder.layers.31.self_attn.qkv
+vision_model.encoder.layers.31.self_attn.projection
+vision_model.encoder.layers.31.layer_norm1
+vision_model.encoder.layers.31.mlp
+vision_model.encoder.layers.31.mlp.activation_fn
+vision_model.encoder.layers.31.mlp.fc1
+vision_model.encoder.layers.31.mlp.fc2
+vision_model.encoder.layers.31.layer_norm2
+vision_model.encoder.layers.32
+vision_model.encoder.layers.32.self_attn
+vision_model.encoder.layers.32.self_attn.dropout
+vision_model.encoder.layers.32.self_attn.qkv
+vision_model.encoder.layers.32.self_attn.projection
+vision_model.encoder.layers.32.layer_norm1
+vision_model.encoder.layers.32.mlp
+vision_model.encoder.layers.32.mlp.activation_fn
+vision_model.encoder.layers.32.mlp.fc1
+vision_model.encoder.layers.32.mlp.fc2
+vision_model.encoder.layers.32.layer_norm2
+vision_model.encoder.layers.33
+vision_model.encoder.layers.33.self_attn
+vision_model.encoder.layers.33.self_attn.dropout
+vision_model.encoder.layers.33.self_attn.qkv
+vision_model.encoder.layers.33.self_attn.projection
+vision_model.encoder.layers.33.layer_norm1
+vision_model.encoder.layers.33.mlp
+vision_model.encoder.layers.33.mlp.activation_fn
+vision_model.encoder.layers.33.mlp.fc1
+vision_model.encoder.layers.33.mlp.fc2
+vision_model.encoder.layers.33.layer_norm2
+vision_model.encoder.layers.34
+vision_model.encoder.layers.34.self_attn
+vision_model.encoder.layers.34.self_attn.dropout
+vision_model.encoder.layers.34.self_attn.qkv
+vision_model.encoder.layers.34.self_attn.projection
+vision_model.encoder.layers.34.layer_norm1
+vision_model.encoder.layers.34.mlp
+vision_model.encoder.layers.34.mlp.activation_fn
+vision_model.encoder.layers.34.mlp.fc1
+vision_model.encoder.layers.34.mlp.fc2
+vision_model.encoder.layers.34.layer_norm2
+vision_model.encoder.layers.35
+vision_model.encoder.layers.35.self_attn
+vision_model.encoder.layers.35.self_attn.dropout
+vision_model.encoder.layers.35.self_attn.qkv
+vision_model.encoder.layers.35.self_attn.projection
+vision_model.encoder.layers.35.layer_norm1
+vision_model.encoder.layers.35.mlp
+vision_model.encoder.layers.35.mlp.activation_fn
+vision_model.encoder.layers.35.mlp.fc1
+vision_model.encoder.layers.35.mlp.fc2
+vision_model.encoder.layers.35.layer_norm2
+vision_model.encoder.layers.36
+vision_model.encoder.layers.36.self_attn
+vision_model.encoder.layers.36.self_attn.dropout
+vision_model.encoder.layers.36.self_attn.qkv
+vision_model.encoder.layers.36.self_attn.projection
+vision_model.encoder.layers.36.layer_norm1
+vision_model.encoder.layers.36.mlp
+vision_model.encoder.layers.36.mlp.activation_fn
+vision_model.encoder.layers.36.mlp.fc1
+vision_model.encoder.layers.36.mlp.fc2
+vision_model.encoder.layers.36.layer_norm2
+vision_model.encoder.layers.37
+vision_model.encoder.layers.37.self_attn
+vision_model.encoder.layers.37.self_attn.dropout
+vision_model.encoder.layers.37.self_attn.qkv
+vision_model.encoder.layers.37.self_attn.projection
+vision_model.encoder.layers.37.layer_norm1
+vision_model.encoder.layers.37.mlp
+vision_model.encoder.layers.37.mlp.activation_fn
+vision_model.encoder.layers.37.mlp.fc1
+vision_model.encoder.layers.37.mlp.fc2
+vision_model.encoder.layers.37.layer_norm2
+vision_model.encoder.layers.38
+vision_model.encoder.layers.38.self_attn
+vision_model.encoder.layers.38.self_attn.dropout
+vision_model.encoder.layers.38.self_attn.qkv
+vision_model.encoder.layers.38.self_attn.projection
+vision_model.encoder.layers.38.layer_norm1
+vision_model.encoder.layers.38.mlp
+vision_model.encoder.layers.38.mlp.activation_fn
+vision_model.encoder.layers.38.mlp.fc1
+vision_model.encoder.layers.38.mlp.fc2
+vision_model.encoder.layers.38.layer_norm2
+vision_model.post_layernorm
+qformer
+qformer.layernorm
+qformer.dropout
+qformer.encoder
+qformer.encoder.layer
+qformer.encoder.layer.0
+qformer.encoder.layer.0.attention
+qformer.encoder.layer.0.attention.attention
+qformer.encoder.layer.0.attention.attention.query
+qformer.encoder.layer.0.attention.attention.key
+qformer.encoder.layer.0.attention.attention.value
+qformer.encoder.layer.0.attention.attention.dropout
+qformer.encoder.layer.0.attention.output
+qformer.encoder.layer.0.attention.output.dense
+qformer.encoder.layer.0.attention.output.LayerNorm
+qformer.encoder.layer.0.attention.output.dropout
+qformer.encoder.layer.0.crossattention
+qformer.encoder.layer.0.crossattention.attention
+qformer.encoder.layer.0.crossattention.attention.query
+qformer.encoder.layer.0.crossattention.attention.key
+qformer.encoder.layer.0.crossattention.attention.value
+qformer.encoder.layer.0.crossattention.attention.dropout
+qformer.encoder.layer.0.crossattention.output
+qformer.encoder.layer.0.crossattention.output.dense
+qformer.encoder.layer.0.crossattention.output.LayerNorm
+qformer.encoder.layer.0.crossattention.output.dropout
+qformer.encoder.layer.0.intermediate_query
+qformer.encoder.layer.0.intermediate_query.dense
+qformer.encoder.layer.0.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.0.output_query
+qformer.encoder.layer.0.output_query.dense
+qformer.encoder.layer.0.output_query.LayerNorm
+qformer.encoder.layer.0.output_query.dropout
+qformer.encoder.layer.1
+qformer.encoder.layer.1.attention
+qformer.encoder.layer.1.attention.attention
+qformer.encoder.layer.1.attention.attention.query
+qformer.encoder.layer.1.attention.attention.key
+qformer.encoder.layer.1.attention.attention.value
+qformer.encoder.layer.1.attention.attention.dropout
+qformer.encoder.layer.1.attention.output
+qformer.encoder.layer.1.attention.output.dense
+qformer.encoder.layer.1.attention.output.LayerNorm
+qformer.encoder.layer.1.attention.output.dropout
+qformer.encoder.layer.1.intermediate_query
+qformer.encoder.layer.1.intermediate_query.dense
+qformer.encoder.layer.1.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.1.output_query
+qformer.encoder.layer.1.output_query.dense
+qformer.encoder.layer.1.output_query.LayerNorm
+qformer.encoder.layer.1.output_query.dropout
+qformer.encoder.layer.2
+qformer.encoder.layer.2.attention
+qformer.encoder.layer.2.attention.attention
+qformer.encoder.layer.2.attention.attention.query
+qformer.encoder.layer.2.attention.attention.key
+qformer.encoder.layer.2.attention.attention.value
+qformer.encoder.layer.2.attention.attention.dropout
+qformer.encoder.layer.2.attention.output
+qformer.encoder.layer.2.attention.output.dense
+qformer.encoder.layer.2.attention.output.LayerNorm
+qformer.encoder.layer.2.attention.output.dropout
+qformer.encoder.layer.2.crossattention
+qformer.encoder.layer.2.crossattention.attention
+qformer.encoder.layer.2.crossattention.attention.query
+qformer.encoder.layer.2.crossattention.attention.key
+qformer.encoder.layer.2.crossattention.attention.value
+qformer.encoder.layer.2.crossattention.attention.dropout
+qformer.encoder.layer.2.crossattention.output
+qformer.encoder.layer.2.crossattention.output.dense
+qformer.encoder.layer.2.crossattention.output.LayerNorm
+qformer.encoder.layer.2.crossattention.output.dropout
+qformer.encoder.layer.2.intermediate_query
+qformer.encoder.layer.2.intermediate_query.dense
+qformer.encoder.layer.2.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.2.output_query
+qformer.encoder.layer.2.output_query.dense
+qformer.encoder.layer.2.output_query.LayerNorm
+qformer.encoder.layer.2.output_query.dropout
+qformer.encoder.layer.3
+qformer.encoder.layer.3.attention
+qformer.encoder.layer.3.attention.attention
+qformer.encoder.layer.3.attention.attention.query
+qformer.encoder.layer.3.attention.attention.key
+qformer.encoder.layer.3.attention.attention.value
+qformer.encoder.layer.3.attention.attention.dropout
+qformer.encoder.layer.3.attention.output
+qformer.encoder.layer.3.attention.output.dense
+qformer.encoder.layer.3.attention.output.LayerNorm
+qformer.encoder.layer.3.attention.output.dropout
+qformer.encoder.layer.3.intermediate_query
+qformer.encoder.layer.3.intermediate_query.dense
+qformer.encoder.layer.3.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.3.output_query
+qformer.encoder.layer.3.output_query.dense
+qformer.encoder.layer.3.output_query.LayerNorm
+qformer.encoder.layer.3.output_query.dropout
+qformer.encoder.layer.4
+qformer.encoder.layer.4.attention
+qformer.encoder.layer.4.attention.attention
+qformer.encoder.layer.4.attention.attention.query
+qformer.encoder.layer.4.attention.attention.key
+qformer.encoder.layer.4.attention.attention.value
+qformer.encoder.layer.4.attention.attention.dropout
+qformer.encoder.layer.4.attention.output
+qformer.encoder.layer.4.attention.output.dense
+qformer.encoder.layer.4.attention.output.LayerNorm
+qformer.encoder.layer.4.attention.output.dropout
+qformer.encoder.layer.4.crossattention
+qformer.encoder.layer.4.crossattention.attention
+qformer.encoder.layer.4.crossattention.attention.query
+qformer.encoder.layer.4.crossattention.attention.key
+qformer.encoder.layer.4.crossattention.attention.value
+qformer.encoder.layer.4.crossattention.attention.dropout
+qformer.encoder.layer.4.crossattention.output
+qformer.encoder.layer.4.crossattention.output.dense
+qformer.encoder.layer.4.crossattention.output.LayerNorm
+qformer.encoder.layer.4.crossattention.output.dropout
+qformer.encoder.layer.4.intermediate_query
+qformer.encoder.layer.4.intermediate_query.dense
+qformer.encoder.layer.4.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.4.output_query
+qformer.encoder.layer.4.output_query.dense
+qformer.encoder.layer.4.output_query.LayerNorm
+qformer.encoder.layer.4.output_query.dropout
+qformer.encoder.layer.5
+qformer.encoder.layer.5.attention
+qformer.encoder.layer.5.attention.attention
+qformer.encoder.layer.5.attention.attention.query
+qformer.encoder.layer.5.attention.attention.key
+qformer.encoder.layer.5.attention.attention.value
+qformer.encoder.layer.5.attention.attention.dropout
+qformer.encoder.layer.5.attention.output
+qformer.encoder.layer.5.attention.output.dense
+qformer.encoder.layer.5.attention.output.LayerNorm
+qformer.encoder.layer.5.attention.output.dropout
+qformer.encoder.layer.5.intermediate_query
+qformer.encoder.layer.5.intermediate_query.dense
+qformer.encoder.layer.5.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.5.output_query
+qformer.encoder.layer.5.output_query.dense
+qformer.encoder.layer.5.output_query.LayerNorm
+qformer.encoder.layer.5.output_query.dropout
+qformer.encoder.layer.6
+qformer.encoder.layer.6.attention
+qformer.encoder.layer.6.attention.attention
+qformer.encoder.layer.6.attention.attention.query
+qformer.encoder.layer.6.attention.attention.key
+qformer.encoder.layer.6.attention.attention.value
+qformer.encoder.layer.6.attention.attention.dropout
+qformer.encoder.layer.6.attention.output
+qformer.encoder.layer.6.attention.output.dense
+qformer.encoder.layer.6.attention.output.LayerNorm
+qformer.encoder.layer.6.attention.output.dropout
+qformer.encoder.layer.6.crossattention
+qformer.encoder.layer.6.crossattention.attention
+qformer.encoder.layer.6.crossattention.attention.query
+qformer.encoder.layer.6.crossattention.attention.key
+qformer.encoder.layer.6.crossattention.attention.value
+qformer.encoder.layer.6.crossattention.attention.dropout
+qformer.encoder.layer.6.crossattention.output
+qformer.encoder.layer.6.crossattention.output.dense
+qformer.encoder.layer.6.crossattention.output.LayerNorm
+qformer.encoder.layer.6.crossattention.output.dropout
+qformer.encoder.layer.6.intermediate_query
+qformer.encoder.layer.6.intermediate_query.dense
+qformer.encoder.layer.6.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.6.output_query
+qformer.encoder.layer.6.output_query.dense
+qformer.encoder.layer.6.output_query.LayerNorm
+qformer.encoder.layer.6.output_query.dropout
+qformer.encoder.layer.7
+qformer.encoder.layer.7.attention
+qformer.encoder.layer.7.attention.attention
+qformer.encoder.layer.7.attention.attention.query
+qformer.encoder.layer.7.attention.attention.key
+qformer.encoder.layer.7.attention.attention.value
+qformer.encoder.layer.7.attention.attention.dropout
+qformer.encoder.layer.7.attention.output
+qformer.encoder.layer.7.attention.output.dense
+qformer.encoder.layer.7.attention.output.LayerNorm
+qformer.encoder.layer.7.attention.output.dropout
+qformer.encoder.layer.7.intermediate_query
+qformer.encoder.layer.7.intermediate_query.dense
+qformer.encoder.layer.7.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.7.output_query
+qformer.encoder.layer.7.output_query.dense
+qformer.encoder.layer.7.output_query.LayerNorm
+qformer.encoder.layer.7.output_query.dropout
+qformer.encoder.layer.8
+qformer.encoder.layer.8.attention
+qformer.encoder.layer.8.attention.attention
+qformer.encoder.layer.8.attention.attention.query
+qformer.encoder.layer.8.attention.attention.key
+qformer.encoder.layer.8.attention.attention.value
+qformer.encoder.layer.8.attention.attention.dropout
+qformer.encoder.layer.8.attention.output
+qformer.encoder.layer.8.attention.output.dense
+qformer.encoder.layer.8.attention.output.LayerNorm
+qformer.encoder.layer.8.attention.output.dropout
+qformer.encoder.layer.8.crossattention
+qformer.encoder.layer.8.crossattention.attention
+qformer.encoder.layer.8.crossattention.attention.query
+qformer.encoder.layer.8.crossattention.attention.key
+qformer.encoder.layer.8.crossattention.attention.value
+qformer.encoder.layer.8.crossattention.attention.dropout
+qformer.encoder.layer.8.crossattention.output
+qformer.encoder.layer.8.crossattention.output.dense
+qformer.encoder.layer.8.crossattention.output.LayerNorm
+qformer.encoder.layer.8.crossattention.output.dropout
+qformer.encoder.layer.8.intermediate_query
+qformer.encoder.layer.8.intermediate_query.dense
+qformer.encoder.layer.8.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.8.output_query
+qformer.encoder.layer.8.output_query.dense
+qformer.encoder.layer.8.output_query.LayerNorm
+qformer.encoder.layer.8.output_query.dropout
+qformer.encoder.layer.9
+qformer.encoder.layer.9.attention
+qformer.encoder.layer.9.attention.attention
+qformer.encoder.layer.9.attention.attention.query
+qformer.encoder.layer.9.attention.attention.key
+qformer.encoder.layer.9.attention.attention.value
+qformer.encoder.layer.9.attention.attention.dropout
+qformer.encoder.layer.9.attention.output
+qformer.encoder.layer.9.attention.output.dense
+qformer.encoder.layer.9.attention.output.LayerNorm
+qformer.encoder.layer.9.attention.output.dropout
+qformer.encoder.layer.9.intermediate_query
+qformer.encoder.layer.9.intermediate_query.dense
+qformer.encoder.layer.9.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.9.output_query
+qformer.encoder.layer.9.output_query.dense
+qformer.encoder.layer.9.output_query.LayerNorm
+qformer.encoder.layer.9.output_query.dropout
+qformer.encoder.layer.10
+qformer.encoder.layer.10.attention
+qformer.encoder.layer.10.attention.attention
+qformer.encoder.layer.10.attention.attention.query
+qformer.encoder.layer.10.attention.attention.key
+qformer.encoder.layer.10.attention.attention.value
+qformer.encoder.layer.10.attention.attention.dropout
+qformer.encoder.layer.10.attention.output
+qformer.encoder.layer.10.attention.output.dense
+qformer.encoder.layer.10.attention.output.LayerNorm
+qformer.encoder.layer.10.attention.output.dropout
+qformer.encoder.layer.10.crossattention
+qformer.encoder.layer.10.crossattention.attention
+qformer.encoder.layer.10.crossattention.attention.query
+qformer.encoder.layer.10.crossattention.attention.key
+qformer.encoder.layer.10.crossattention.attention.value
+qformer.encoder.layer.10.crossattention.attention.dropout
+qformer.encoder.layer.10.crossattention.output
+qformer.encoder.layer.10.crossattention.output.dense
+qformer.encoder.layer.10.crossattention.output.LayerNorm
+qformer.encoder.layer.10.crossattention.output.dropout
+qformer.encoder.layer.10.intermediate_query
+qformer.encoder.layer.10.intermediate_query.dense
+qformer.encoder.layer.10.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.10.output_query
+qformer.encoder.layer.10.output_query.dense
+qformer.encoder.layer.10.output_query.LayerNorm
+qformer.encoder.layer.10.output_query.dropout
+qformer.encoder.layer.11
+qformer.encoder.layer.11.attention
+qformer.encoder.layer.11.attention.attention
+qformer.encoder.layer.11.attention.attention.query
+qformer.encoder.layer.11.attention.attention.key
+qformer.encoder.layer.11.attention.attention.value
+qformer.encoder.layer.11.attention.attention.dropout
+qformer.encoder.layer.11.attention.output
+qformer.encoder.layer.11.attention.output.dense
+qformer.encoder.layer.11.attention.output.LayerNorm
+qformer.encoder.layer.11.attention.output.dropout
+qformer.encoder.layer.11.intermediate_query
+qformer.encoder.layer.11.intermediate_query.dense
+qformer.encoder.layer.11.intermediate_query.intermediate_act_fn
+qformer.encoder.layer.11.output_query
+qformer.encoder.layer.11.output_query.dense
+qformer.encoder.layer.11.output_query.LayerNorm
+qformer.encoder.layer.11.output_query.dropout
+language_projection
+language_model
+language_model.model
+language_model.model.decoder
+language_model.model.decoder.embed_tokens
+language_model.model.decoder.embed_positions
+language_model.model.decoder.final_layer_norm
+language_model.model.decoder.layers
+language_model.model.decoder.layers.0
+language_model.model.decoder.layers.0.self_attn
+language_model.model.decoder.layers.0.self_attn.k_proj
+language_model.model.decoder.layers.0.self_attn.v_proj
+language_model.model.decoder.layers.0.self_attn.q_proj
+language_model.model.decoder.layers.0.self_attn.out_proj
+language_model.model.decoder.layers.0.activation_fn
+language_model.model.decoder.layers.0.self_attn_layer_norm
+language_model.model.decoder.layers.0.fc1
+language_model.model.decoder.layers.0.fc2
+language_model.model.decoder.layers.0.final_layer_norm
+language_model.model.decoder.layers.1
+language_model.model.decoder.layers.1.self_attn
+language_model.model.decoder.layers.1.self_attn.k_proj
+language_model.model.decoder.layers.1.self_attn.v_proj
+language_model.model.decoder.layers.1.self_attn.q_proj
+language_model.model.decoder.layers.1.self_attn.out_proj
+language_model.model.decoder.layers.1.activation_fn
+language_model.model.decoder.layers.1.self_attn_layer_norm
+language_model.model.decoder.layers.1.fc1
+language_model.model.decoder.layers.1.fc2
+language_model.model.decoder.layers.1.final_layer_norm
+language_model.model.decoder.layers.2
+language_model.model.decoder.layers.2.self_attn
+language_model.model.decoder.layers.2.self_attn.k_proj
+language_model.model.decoder.layers.2.self_attn.v_proj
+language_model.model.decoder.layers.2.self_attn.q_proj
+language_model.model.decoder.layers.2.self_attn.out_proj
+language_model.model.decoder.layers.2.activation_fn
+language_model.model.decoder.layers.2.self_attn_layer_norm
+language_model.model.decoder.layers.2.fc1
+language_model.model.decoder.layers.2.fc2
+language_model.model.decoder.layers.2.final_layer_norm
+language_model.model.decoder.layers.3
+language_model.model.decoder.layers.3.self_attn
+language_model.model.decoder.layers.3.self_attn.k_proj
+language_model.model.decoder.layers.3.self_attn.v_proj
+language_model.model.decoder.layers.3.self_attn.q_proj
+language_model.model.decoder.layers.3.self_attn.out_proj
+language_model.model.decoder.layers.3.activation_fn
+language_model.model.decoder.layers.3.self_attn_layer_norm
+language_model.model.decoder.layers.3.fc1
+language_model.model.decoder.layers.3.fc2
+language_model.model.decoder.layers.3.final_layer_norm
+language_model.model.decoder.layers.4
+language_model.model.decoder.layers.4.self_attn
+language_model.model.decoder.layers.4.self_attn.k_proj
+language_model.model.decoder.layers.4.self_attn.v_proj
+language_model.model.decoder.layers.4.self_attn.q_proj
+language_model.model.decoder.layers.4.self_attn.out_proj
+language_model.model.decoder.layers.4.activation_fn
+language_model.model.decoder.layers.4.self_attn_layer_norm
+language_model.model.decoder.layers.4.fc1
+language_model.model.decoder.layers.4.fc2
+language_model.model.decoder.layers.4.final_layer_norm
+language_model.model.decoder.layers.5
+language_model.model.decoder.layers.5.self_attn
+language_model.model.decoder.layers.5.self_attn.k_proj
+language_model.model.decoder.layers.5.self_attn.v_proj
+language_model.model.decoder.layers.5.self_attn.q_proj
+language_model.model.decoder.layers.5.self_attn.out_proj
+language_model.model.decoder.layers.5.activation_fn
+language_model.model.decoder.layers.5.self_attn_layer_norm
+language_model.model.decoder.layers.5.fc1
+language_model.model.decoder.layers.5.fc2
+language_model.model.decoder.layers.5.final_layer_norm
+language_model.model.decoder.layers.6
+language_model.model.decoder.layers.6.self_attn
+language_model.model.decoder.layers.6.self_attn.k_proj
+language_model.model.decoder.layers.6.self_attn.v_proj
+language_model.model.decoder.layers.6.self_attn.q_proj
+language_model.model.decoder.layers.6.self_attn.out_proj
+language_model.model.decoder.layers.6.activation_fn
+language_model.model.decoder.layers.6.self_attn_layer_norm
+language_model.model.decoder.layers.6.fc1
+language_model.model.decoder.layers.6.fc2
+language_model.model.decoder.layers.6.final_layer_norm
+language_model.model.decoder.layers.7
+language_model.model.decoder.layers.7.self_attn
+language_model.model.decoder.layers.7.self_attn.k_proj
+language_model.model.decoder.layers.7.self_attn.v_proj
+language_model.model.decoder.layers.7.self_attn.q_proj
+language_model.model.decoder.layers.7.self_attn.out_proj
+language_model.model.decoder.layers.7.activation_fn
+language_model.model.decoder.layers.7.self_attn_layer_norm
+language_model.model.decoder.layers.7.fc1
+language_model.model.decoder.layers.7.fc2
+language_model.model.decoder.layers.7.final_layer_norm
+language_model.model.decoder.layers.8
+language_model.model.decoder.layers.8.self_attn
+language_model.model.decoder.layers.8.self_attn.k_proj
+language_model.model.decoder.layers.8.self_attn.v_proj
+language_model.model.decoder.layers.8.self_attn.q_proj
+language_model.model.decoder.layers.8.self_attn.out_proj
+language_model.model.decoder.layers.8.activation_fn
+language_model.model.decoder.layers.8.self_attn_layer_norm
+language_model.model.decoder.layers.8.fc1
+language_model.model.decoder.layers.8.fc2
+language_model.model.decoder.layers.8.final_layer_norm
+language_model.model.decoder.layers.9
+language_model.model.decoder.layers.9.self_attn
+language_model.model.decoder.layers.9.self_attn.k_proj
+language_model.model.decoder.layers.9.self_attn.v_proj
+language_model.model.decoder.layers.9.self_attn.q_proj
+language_model.model.decoder.layers.9.self_attn.out_proj
+language_model.model.decoder.layers.9.activation_fn
+language_model.model.decoder.layers.9.self_attn_layer_norm
+language_model.model.decoder.layers.9.fc1
+language_model.model.decoder.layers.9.fc2
+language_model.model.decoder.layers.9.final_layer_norm
+language_model.model.decoder.layers.10
+language_model.model.decoder.layers.10.self_attn
+language_model.model.decoder.layers.10.self_attn.k_proj
+language_model.model.decoder.layers.10.self_attn.v_proj
+language_model.model.decoder.layers.10.self_attn.q_proj
+language_model.model.decoder.layers.10.self_attn.out_proj
+language_model.model.decoder.layers.10.activation_fn
+language_model.model.decoder.layers.10.self_attn_layer_norm
+language_model.model.decoder.layers.10.fc1
+language_model.model.decoder.layers.10.fc2
+language_model.model.decoder.layers.10.final_layer_norm
+language_model.model.decoder.layers.11
+language_model.model.decoder.layers.11.self_attn
+language_model.model.decoder.layers.11.self_attn.k_proj
+language_model.model.decoder.layers.11.self_attn.v_proj
+language_model.model.decoder.layers.11.self_attn.q_proj
+language_model.model.decoder.layers.11.self_attn.out_proj
+language_model.model.decoder.layers.11.activation_fn
+language_model.model.decoder.layers.11.self_attn_layer_norm
+language_model.model.decoder.layers.11.fc1
+language_model.model.decoder.layers.11.fc2
+language_model.model.decoder.layers.11.final_layer_norm
+language_model.model.decoder.layers.12
+language_model.model.decoder.layers.12.self_attn
+language_model.model.decoder.layers.12.self_attn.k_proj
+language_model.model.decoder.layers.12.self_attn.v_proj
+language_model.model.decoder.layers.12.self_attn.q_proj
+language_model.model.decoder.layers.12.self_attn.out_proj
+language_model.model.decoder.layers.12.activation_fn
+language_model.model.decoder.layers.12.self_attn_layer_norm
+language_model.model.decoder.layers.12.fc1
+language_model.model.decoder.layers.12.fc2
+language_model.model.decoder.layers.12.final_layer_norm
+language_model.model.decoder.layers.13
+language_model.model.decoder.layers.13.self_attn
+language_model.model.decoder.layers.13.self_attn.k_proj
+language_model.model.decoder.layers.13.self_attn.v_proj
+language_model.model.decoder.layers.13.self_attn.q_proj
+language_model.model.decoder.layers.13.self_attn.out_proj
+language_model.model.decoder.layers.13.activation_fn
+language_model.model.decoder.layers.13.self_attn_layer_norm
+language_model.model.decoder.layers.13.fc1
+language_model.model.decoder.layers.13.fc2
+language_model.model.decoder.layers.13.final_layer_norm
+language_model.model.decoder.layers.14
+language_model.model.decoder.layers.14.self_attn
+language_model.model.decoder.layers.14.self_attn.k_proj
+language_model.model.decoder.layers.14.self_attn.v_proj
+language_model.model.decoder.layers.14.self_attn.q_proj
+language_model.model.decoder.layers.14.self_attn.out_proj
+language_model.model.decoder.layers.14.activation_fn
+language_model.model.decoder.layers.14.self_attn_layer_norm
+language_model.model.decoder.layers.14.fc1
+language_model.model.decoder.layers.14.fc2
+language_model.model.decoder.layers.14.final_layer_norm
+language_model.model.decoder.layers.15
+language_model.model.decoder.layers.15.self_attn
+language_model.model.decoder.layers.15.self_attn.k_proj
+language_model.model.decoder.layers.15.self_attn.v_proj
+language_model.model.decoder.layers.15.self_attn.q_proj
+language_model.model.decoder.layers.15.self_attn.out_proj
+language_model.model.decoder.layers.15.activation_fn
+language_model.model.decoder.layers.15.self_attn_layer_norm
+language_model.model.decoder.layers.15.fc1
+language_model.model.decoder.layers.15.fc2
+language_model.model.decoder.layers.15.final_layer_norm
+language_model.model.decoder.layers.16
+language_model.model.decoder.layers.16.self_attn
+language_model.model.decoder.layers.16.self_attn.k_proj
+language_model.model.decoder.layers.16.self_attn.v_proj
+language_model.model.decoder.layers.16.self_attn.q_proj
+language_model.model.decoder.layers.16.self_attn.out_proj
+language_model.model.decoder.layers.16.activation_fn
+language_model.model.decoder.layers.16.self_attn_layer_norm
+language_model.model.decoder.layers.16.fc1
+language_model.model.decoder.layers.16.fc2
+language_model.model.decoder.layers.16.final_layer_norm
+language_model.model.decoder.layers.17
+language_model.model.decoder.layers.17.self_attn
+language_model.model.decoder.layers.17.self_attn.k_proj
+language_model.model.decoder.layers.17.self_attn.v_proj
+language_model.model.decoder.layers.17.self_attn.q_proj
+language_model.model.decoder.layers.17.self_attn.out_proj
+language_model.model.decoder.layers.17.activation_fn
+language_model.model.decoder.layers.17.self_attn_layer_norm
+language_model.model.decoder.layers.17.fc1
+language_model.model.decoder.layers.17.fc2
+language_model.model.decoder.layers.17.final_layer_norm
+language_model.model.decoder.layers.18
+language_model.model.decoder.layers.18.self_attn
+language_model.model.decoder.layers.18.self_attn.k_proj
+language_model.model.decoder.layers.18.self_attn.v_proj
+language_model.model.decoder.layers.18.self_attn.q_proj
+language_model.model.decoder.layers.18.self_attn.out_proj
+language_model.model.decoder.layers.18.activation_fn
+language_model.model.decoder.layers.18.self_attn_layer_norm
+language_model.model.decoder.layers.18.fc1
+language_model.model.decoder.layers.18.fc2
+language_model.model.decoder.layers.18.final_layer_norm
+language_model.model.decoder.layers.19
+language_model.model.decoder.layers.19.self_attn
+language_model.model.decoder.layers.19.self_attn.k_proj
+language_model.model.decoder.layers.19.self_attn.v_proj
+language_model.model.decoder.layers.19.self_attn.q_proj
+language_model.model.decoder.layers.19.self_attn.out_proj
+language_model.model.decoder.layers.19.activation_fn
+language_model.model.decoder.layers.19.self_attn_layer_norm
+language_model.model.decoder.layers.19.fc1
+language_model.model.decoder.layers.19.fc2
+language_model.model.decoder.layers.19.final_layer_norm
+language_model.model.decoder.layers.20
+language_model.model.decoder.layers.20.self_attn
+language_model.model.decoder.layers.20.self_attn.k_proj
+language_model.model.decoder.layers.20.self_attn.v_proj
+language_model.model.decoder.layers.20.self_attn.q_proj
+language_model.model.decoder.layers.20.self_attn.out_proj
+language_model.model.decoder.layers.20.activation_fn
+language_model.model.decoder.layers.20.self_attn_layer_norm
+language_model.model.decoder.layers.20.fc1
+language_model.model.decoder.layers.20.fc2
+language_model.model.decoder.layers.20.final_layer_norm
+language_model.model.decoder.layers.21
+language_model.model.decoder.layers.21.self_attn
+language_model.model.decoder.layers.21.self_attn.k_proj
+language_model.model.decoder.layers.21.self_attn.v_proj
+language_model.model.decoder.layers.21.self_attn.q_proj
+language_model.model.decoder.layers.21.self_attn.out_proj
+language_model.model.decoder.layers.21.activation_fn
+language_model.model.decoder.layers.21.self_attn_layer_norm
+language_model.model.decoder.layers.21.fc1
+language_model.model.decoder.layers.21.fc2
+language_model.model.decoder.layers.21.final_layer_norm
+language_model.model.decoder.layers.22
+language_model.model.decoder.layers.22.self_attn
+language_model.model.decoder.layers.22.self_attn.k_proj
+language_model.model.decoder.layers.22.self_attn.v_proj
+language_model.model.decoder.layers.22.self_attn.q_proj
+language_model.model.decoder.layers.22.self_attn.out_proj
+language_model.model.decoder.layers.22.activation_fn
+language_model.model.decoder.layers.22.self_attn_layer_norm
+language_model.model.decoder.layers.22.fc1
+language_model.model.decoder.layers.22.fc2
+language_model.model.decoder.layers.22.final_layer_norm
+language_model.model.decoder.layers.23
+language_model.model.decoder.layers.23.self_attn
+language_model.model.decoder.layers.23.self_attn.k_proj
+language_model.model.decoder.layers.23.self_attn.v_proj
+language_model.model.decoder.layers.23.self_attn.q_proj
+language_model.model.decoder.layers.23.self_attn.out_proj
+language_model.model.decoder.layers.23.activation_fn
+language_model.model.decoder.layers.23.self_attn_layer_norm
+language_model.model.decoder.layers.23.fc1
+language_model.model.decoder.layers.23.fc2
+language_model.model.decoder.layers.23.final_layer_norm
+language_model.model.decoder.layers.24
+language_model.model.decoder.layers.24.self_attn
+language_model.model.decoder.layers.24.self_attn.k_proj
+language_model.model.decoder.layers.24.self_attn.v_proj
+language_model.model.decoder.layers.24.self_attn.q_proj
+language_model.model.decoder.layers.24.self_attn.out_proj
+language_model.model.decoder.layers.24.activation_fn
+language_model.model.decoder.layers.24.self_attn_layer_norm
+language_model.model.decoder.layers.24.fc1
+language_model.model.decoder.layers.24.fc2
+language_model.model.decoder.layers.24.final_layer_norm
+language_model.model.decoder.layers.25
+language_model.model.decoder.layers.25.self_attn
+language_model.model.decoder.layers.25.self_attn.k_proj
+language_model.model.decoder.layers.25.self_attn.v_proj
+language_model.model.decoder.layers.25.self_attn.q_proj
+language_model.model.decoder.layers.25.self_attn.out_proj
+language_model.model.decoder.layers.25.activation_fn
+language_model.model.decoder.layers.25.self_attn_layer_norm
+language_model.model.decoder.layers.25.fc1
+language_model.model.decoder.layers.25.fc2
+language_model.model.decoder.layers.25.final_layer_norm
+language_model.model.decoder.layers.26
+language_model.model.decoder.layers.26.self_attn
+language_model.model.decoder.layers.26.self_attn.k_proj
+language_model.model.decoder.layers.26.self_attn.v_proj
+language_model.model.decoder.layers.26.self_attn.q_proj
+language_model.model.decoder.layers.26.self_attn.out_proj
+language_model.model.decoder.layers.26.activation_fn
+language_model.model.decoder.layers.26.self_attn_layer_norm
+language_model.model.decoder.layers.26.fc1
+language_model.model.decoder.layers.26.fc2
+language_model.model.decoder.layers.26.final_layer_norm
+language_model.model.decoder.layers.27
+language_model.model.decoder.layers.27.self_attn
+language_model.model.decoder.layers.27.self_attn.k_proj
+language_model.model.decoder.layers.27.self_attn.v_proj
+language_model.model.decoder.layers.27.self_attn.q_proj
+language_model.model.decoder.layers.27.self_attn.out_proj
+language_model.model.decoder.layers.27.activation_fn
+language_model.model.decoder.layers.27.self_attn_layer_norm
+language_model.model.decoder.layers.27.fc1
+language_model.model.decoder.layers.27.fc2
+language_model.model.decoder.layers.27.final_layer_norm
+language_model.model.decoder.layers.28
+language_model.model.decoder.layers.28.self_attn
+language_model.model.decoder.layers.28.self_attn.k_proj
+language_model.model.decoder.layers.28.self_attn.v_proj
+language_model.model.decoder.layers.28.self_attn.q_proj
+language_model.model.decoder.layers.28.self_attn.out_proj
+language_model.model.decoder.layers.28.activation_fn
+language_model.model.decoder.layers.28.self_attn_layer_norm
+language_model.model.decoder.layers.28.fc1
+language_model.model.decoder.layers.28.fc2
+language_model.model.decoder.layers.28.final_layer_norm
+language_model.model.decoder.layers.29
+language_model.model.decoder.layers.29.self_attn
+language_model.model.decoder.layers.29.self_attn.k_proj
+language_model.model.decoder.layers.29.self_attn.v_proj
+language_model.model.decoder.layers.29.self_attn.q_proj
+language_model.model.decoder.layers.29.self_attn.out_proj
+language_model.model.decoder.layers.29.activation_fn
+language_model.model.decoder.layers.29.self_attn_layer_norm
+language_model.model.decoder.layers.29.fc1
+language_model.model.decoder.layers.29.fc2
+language_model.model.decoder.layers.29.final_layer_norm
+language_model.model.decoder.layers.30
+language_model.model.decoder.layers.30.self_attn
+language_model.model.decoder.layers.30.self_attn.k_proj
+language_model.model.decoder.layers.30.self_attn.v_proj
+language_model.model.decoder.layers.30.self_attn.q_proj
+language_model.model.decoder.layers.30.self_attn.out_proj
+language_model.model.decoder.layers.30.activation_fn
+language_model.model.decoder.layers.30.self_attn_layer_norm
+language_model.model.decoder.layers.30.fc1
+language_model.model.decoder.layers.30.fc2
+language_model.model.decoder.layers.30.final_layer_norm
+language_model.model.decoder.layers.31
+language_model.model.decoder.layers.31.self_attn
+language_model.model.decoder.layers.31.self_attn.k_proj
+language_model.model.decoder.layers.31.self_attn.v_proj
+language_model.model.decoder.layers.31.self_attn.q_proj
+language_model.model.decoder.layers.31.self_attn.out_proj
+language_model.model.decoder.layers.31.activation_fn
+language_model.model.decoder.layers.31.self_attn_layer_norm
+language_model.model.decoder.layers.31.fc1
+language_model.model.decoder.layers.31.fc2
+language_model.model.decoder.layers.31.final_layer_norm
+language_model.lm_head
diff --git a/logs/THUDM/cogvlm-chat-hf.txt b/logs/THUDM/cogvlm-chat-hf.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da896d9821c2fc4957d85a35df63c85d6ffeaeaf
--- /dev/null
+++ b/logs/THUDM/cogvlm-chat-hf.txt
@@ -0,0 +1,1352 @@
+
+model
+model.embed_tokens
+model.layers
+model.layers.0
+model.layers.0.self_attn
+model.layers.0.self_attn.rotary_emb
+model.layers.0.self_attn.vision_expert_query_key_value
+model.layers.0.self_attn.vision_expert_dense
+model.layers.0.self_attn.language_expert_query_key_value
+model.layers.0.self_attn.language_expert_dense
+model.layers.0.mlp
+model.layers.0.mlp.language_mlp
+model.layers.0.mlp.language_mlp.gate_proj
+model.layers.0.mlp.language_mlp.up_proj
+model.layers.0.mlp.language_mlp.down_proj
+model.layers.0.mlp.language_mlp.act_fn
+model.layers.0.mlp.vision_mlp
+model.layers.0.mlp.vision_mlp.gate_proj
+model.layers.0.mlp.vision_mlp.up_proj
+model.layers.0.mlp.vision_mlp.down_proj
+model.layers.0.mlp.vision_mlp.act_fn
+model.layers.0.input_layernorm
+model.layers.0.post_attention_layernorm
+model.layers.1
+model.layers.1.self_attn
+model.layers.1.self_attn.rotary_emb
+model.layers.1.self_attn.vision_expert_query_key_value
+model.layers.1.self_attn.vision_expert_dense
+model.layers.1.self_attn.language_expert_query_key_value
+model.layers.1.self_attn.language_expert_dense
+model.layers.1.mlp
+model.layers.1.mlp.language_mlp
+model.layers.1.mlp.language_mlp.gate_proj
+model.layers.1.mlp.language_mlp.up_proj
+model.layers.1.mlp.language_mlp.down_proj
+model.layers.1.mlp.language_mlp.act_fn
+model.layers.1.mlp.vision_mlp
+model.layers.1.mlp.vision_mlp.gate_proj
+model.layers.1.mlp.vision_mlp.up_proj
+model.layers.1.mlp.vision_mlp.down_proj
+model.layers.1.mlp.vision_mlp.act_fn
+model.layers.1.input_layernorm
+model.layers.1.post_attention_layernorm
+model.layers.2
+model.layers.2.self_attn
+model.layers.2.self_attn.rotary_emb
+model.layers.2.self_attn.vision_expert_query_key_value
+model.layers.2.self_attn.vision_expert_dense
+model.layers.2.self_attn.language_expert_query_key_value
+model.layers.2.self_attn.language_expert_dense
+model.layers.2.mlp
+model.layers.2.mlp.language_mlp
+model.layers.2.mlp.language_mlp.gate_proj
+model.layers.2.mlp.language_mlp.up_proj
+model.layers.2.mlp.language_mlp.down_proj
+model.layers.2.mlp.language_mlp.act_fn
+model.layers.2.mlp.vision_mlp
+model.layers.2.mlp.vision_mlp.gate_proj
+model.layers.2.mlp.vision_mlp.up_proj
+model.layers.2.mlp.vision_mlp.down_proj
+model.layers.2.mlp.vision_mlp.act_fn
+model.layers.2.input_layernorm
+model.layers.2.post_attention_layernorm
+model.layers.3
+model.layers.3.self_attn
+model.layers.3.self_attn.rotary_emb
+model.layers.3.self_attn.vision_expert_query_key_value
+model.layers.3.self_attn.vision_expert_dense
+model.layers.3.self_attn.language_expert_query_key_value
+model.layers.3.self_attn.language_expert_dense
+model.layers.3.mlp
+model.layers.3.mlp.language_mlp
+model.layers.3.mlp.language_mlp.gate_proj
+model.layers.3.mlp.language_mlp.up_proj
+model.layers.3.mlp.language_mlp.down_proj
+model.layers.3.mlp.language_mlp.act_fn
+model.layers.3.mlp.vision_mlp
+model.layers.3.mlp.vision_mlp.gate_proj
+model.layers.3.mlp.vision_mlp.up_proj
+model.layers.3.mlp.vision_mlp.down_proj
+model.layers.3.mlp.vision_mlp.act_fn
+model.layers.3.input_layernorm
+model.layers.3.post_attention_layernorm
+model.layers.4
+model.layers.4.self_attn
+model.layers.4.self_attn.rotary_emb
+model.layers.4.self_attn.vision_expert_query_key_value
+model.layers.4.self_attn.vision_expert_dense
+model.layers.4.self_attn.language_expert_query_key_value
+model.layers.4.self_attn.language_expert_dense
+model.layers.4.mlp
+model.layers.4.mlp.language_mlp
+model.layers.4.mlp.language_mlp.gate_proj
+model.layers.4.mlp.language_mlp.up_proj
+model.layers.4.mlp.language_mlp.down_proj
+model.layers.4.mlp.language_mlp.act_fn
+model.layers.4.mlp.vision_mlp
+model.layers.4.mlp.vision_mlp.gate_proj
+model.layers.4.mlp.vision_mlp.up_proj
+model.layers.4.mlp.vision_mlp.down_proj
+model.layers.4.mlp.vision_mlp.act_fn
+model.layers.4.input_layernorm
+model.layers.4.post_attention_layernorm
+model.layers.5
+model.layers.5.self_attn
+model.layers.5.self_attn.rotary_emb
+model.layers.5.self_attn.vision_expert_query_key_value
+model.layers.5.self_attn.vision_expert_dense
+model.layers.5.self_attn.language_expert_query_key_value
+model.layers.5.self_attn.language_expert_dense
+model.layers.5.mlp
+model.layers.5.mlp.language_mlp
+model.layers.5.mlp.language_mlp.gate_proj
+model.layers.5.mlp.language_mlp.up_proj
+model.layers.5.mlp.language_mlp.down_proj
+model.layers.5.mlp.language_mlp.act_fn
+model.layers.5.mlp.vision_mlp
+model.layers.5.mlp.vision_mlp.gate_proj
+model.layers.5.mlp.vision_mlp.up_proj
+model.layers.5.mlp.vision_mlp.down_proj
+model.layers.5.mlp.vision_mlp.act_fn
+model.layers.5.input_layernorm
+model.layers.5.post_attention_layernorm
+model.layers.6
+model.layers.6.self_attn
+model.layers.6.self_attn.rotary_emb
+model.layers.6.self_attn.vision_expert_query_key_value
+model.layers.6.self_attn.vision_expert_dense
+model.layers.6.self_attn.language_expert_query_key_value
+model.layers.6.self_attn.language_expert_dense
+model.layers.6.mlp
+model.layers.6.mlp.language_mlp
+model.layers.6.mlp.language_mlp.gate_proj
+model.layers.6.mlp.language_mlp.up_proj
+model.layers.6.mlp.language_mlp.down_proj
+model.layers.6.mlp.language_mlp.act_fn
+model.layers.6.mlp.vision_mlp
+model.layers.6.mlp.vision_mlp.gate_proj
+model.layers.6.mlp.vision_mlp.up_proj
+model.layers.6.mlp.vision_mlp.down_proj
+model.layers.6.mlp.vision_mlp.act_fn
+model.layers.6.input_layernorm
+model.layers.6.post_attention_layernorm
+model.layers.7
+model.layers.7.self_attn
+model.layers.7.self_attn.rotary_emb
+model.layers.7.self_attn.vision_expert_query_key_value
+model.layers.7.self_attn.vision_expert_dense
+model.layers.7.self_attn.language_expert_query_key_value
+model.layers.7.self_attn.language_expert_dense
+model.layers.7.mlp
+model.layers.7.mlp.language_mlp
+model.layers.7.mlp.language_mlp.gate_proj
+model.layers.7.mlp.language_mlp.up_proj
+model.layers.7.mlp.language_mlp.down_proj
+model.layers.7.mlp.language_mlp.act_fn
+model.layers.7.mlp.vision_mlp
+model.layers.7.mlp.vision_mlp.gate_proj
+model.layers.7.mlp.vision_mlp.up_proj
+model.layers.7.mlp.vision_mlp.down_proj
+model.layers.7.mlp.vision_mlp.act_fn
+model.layers.7.input_layernorm
+model.layers.7.post_attention_layernorm
+model.layers.8
+model.layers.8.self_attn
+model.layers.8.self_attn.rotary_emb
+model.layers.8.self_attn.vision_expert_query_key_value
+model.layers.8.self_attn.vision_expert_dense
+model.layers.8.self_attn.language_expert_query_key_value
+model.layers.8.self_attn.language_expert_dense
+model.layers.8.mlp
+model.layers.8.mlp.language_mlp
+model.layers.8.mlp.language_mlp.gate_proj
+model.layers.8.mlp.language_mlp.up_proj
+model.layers.8.mlp.language_mlp.down_proj
+model.layers.8.mlp.language_mlp.act_fn
+model.layers.8.mlp.vision_mlp
+model.layers.8.mlp.vision_mlp.gate_proj
+model.layers.8.mlp.vision_mlp.up_proj
+model.layers.8.mlp.vision_mlp.down_proj
+model.layers.8.mlp.vision_mlp.act_fn
+model.layers.8.input_layernorm
+model.layers.8.post_attention_layernorm
+model.layers.9
+model.layers.9.self_attn
+model.layers.9.self_attn.rotary_emb
+model.layers.9.self_attn.vision_expert_query_key_value
+model.layers.9.self_attn.vision_expert_dense
+model.layers.9.self_attn.language_expert_query_key_value
+model.layers.9.self_attn.language_expert_dense
+model.layers.9.mlp
+model.layers.9.mlp.language_mlp
+model.layers.9.mlp.language_mlp.gate_proj
+model.layers.9.mlp.language_mlp.up_proj
+model.layers.9.mlp.language_mlp.down_proj
+model.layers.9.mlp.language_mlp.act_fn
+model.layers.9.mlp.vision_mlp
+model.layers.9.mlp.vision_mlp.gate_proj
+model.layers.9.mlp.vision_mlp.up_proj
+model.layers.9.mlp.vision_mlp.down_proj
+model.layers.9.mlp.vision_mlp.act_fn
+model.layers.9.input_layernorm
+model.layers.9.post_attention_layernorm
+model.layers.10
+model.layers.10.self_attn
+model.layers.10.self_attn.rotary_emb
+model.layers.10.self_attn.vision_expert_query_key_value
+model.layers.10.self_attn.vision_expert_dense
+model.layers.10.self_attn.language_expert_query_key_value
+model.layers.10.self_attn.language_expert_dense
+model.layers.10.mlp
+model.layers.10.mlp.language_mlp
+model.layers.10.mlp.language_mlp.gate_proj
+model.layers.10.mlp.language_mlp.up_proj
+model.layers.10.mlp.language_mlp.down_proj
+model.layers.10.mlp.language_mlp.act_fn
+model.layers.10.mlp.vision_mlp
+model.layers.10.mlp.vision_mlp.gate_proj
+model.layers.10.mlp.vision_mlp.up_proj
+model.layers.10.mlp.vision_mlp.down_proj
+model.layers.10.mlp.vision_mlp.act_fn
+model.layers.10.input_layernorm
+model.layers.10.post_attention_layernorm
+model.layers.11
+model.layers.11.self_attn
+model.layers.11.self_attn.rotary_emb
+model.layers.11.self_attn.vision_expert_query_key_value
+model.layers.11.self_attn.vision_expert_dense
+model.layers.11.self_attn.language_expert_query_key_value
+model.layers.11.self_attn.language_expert_dense
+model.layers.11.mlp
+model.layers.11.mlp.language_mlp
+model.layers.11.mlp.language_mlp.gate_proj
+model.layers.11.mlp.language_mlp.up_proj
+model.layers.11.mlp.language_mlp.down_proj
+model.layers.11.mlp.language_mlp.act_fn
+model.layers.11.mlp.vision_mlp
+model.layers.11.mlp.vision_mlp.gate_proj
+model.layers.11.mlp.vision_mlp.up_proj
+model.layers.11.mlp.vision_mlp.down_proj
+model.layers.11.mlp.vision_mlp.act_fn
+model.layers.11.input_layernorm
+model.layers.11.post_attention_layernorm
+model.layers.12
+model.layers.12.self_attn
+model.layers.12.self_attn.rotary_emb
+model.layers.12.self_attn.vision_expert_query_key_value
+model.layers.12.self_attn.vision_expert_dense
+model.layers.12.self_attn.language_expert_query_key_value
+model.layers.12.self_attn.language_expert_dense
+model.layers.12.mlp
+model.layers.12.mlp.language_mlp
+model.layers.12.mlp.language_mlp.gate_proj
+model.layers.12.mlp.language_mlp.up_proj
+model.layers.12.mlp.language_mlp.down_proj
+model.layers.12.mlp.language_mlp.act_fn
+model.layers.12.mlp.vision_mlp
+model.layers.12.mlp.vision_mlp.gate_proj
+model.layers.12.mlp.vision_mlp.up_proj
+model.layers.12.mlp.vision_mlp.down_proj
+model.layers.12.mlp.vision_mlp.act_fn
+model.layers.12.input_layernorm
+model.layers.12.post_attention_layernorm
+model.layers.13
+model.layers.13.self_attn
+model.layers.13.self_attn.rotary_emb
+model.layers.13.self_attn.vision_expert_query_key_value
+model.layers.13.self_attn.vision_expert_dense
+model.layers.13.self_attn.language_expert_query_key_value
+model.layers.13.self_attn.language_expert_dense
+model.layers.13.mlp
+model.layers.13.mlp.language_mlp
+model.layers.13.mlp.language_mlp.gate_proj
+model.layers.13.mlp.language_mlp.up_proj
+model.layers.13.mlp.language_mlp.down_proj
+model.layers.13.mlp.language_mlp.act_fn
+model.layers.13.mlp.vision_mlp
+model.layers.13.mlp.vision_mlp.gate_proj
+model.layers.13.mlp.vision_mlp.up_proj
+model.layers.13.mlp.vision_mlp.down_proj
+model.layers.13.mlp.vision_mlp.act_fn
+model.layers.13.input_layernorm
+model.layers.13.post_attention_layernorm
+model.layers.14
+model.layers.14.self_attn
+model.layers.14.self_attn.rotary_emb
+model.layers.14.self_attn.vision_expert_query_key_value
+model.layers.14.self_attn.vision_expert_dense
+model.layers.14.self_attn.language_expert_query_key_value
+model.layers.14.self_attn.language_expert_dense
+model.layers.14.mlp
+model.layers.14.mlp.language_mlp
+model.layers.14.mlp.language_mlp.gate_proj
+model.layers.14.mlp.language_mlp.up_proj
+model.layers.14.mlp.language_mlp.down_proj
+model.layers.14.mlp.language_mlp.act_fn
+model.layers.14.mlp.vision_mlp
+model.layers.14.mlp.vision_mlp.gate_proj
+model.layers.14.mlp.vision_mlp.up_proj
+model.layers.14.mlp.vision_mlp.down_proj
+model.layers.14.mlp.vision_mlp.act_fn
+model.layers.14.input_layernorm
+model.layers.14.post_attention_layernorm
+model.layers.15
+model.layers.15.self_attn
+model.layers.15.self_attn.rotary_emb
+model.layers.15.self_attn.vision_expert_query_key_value
+model.layers.15.self_attn.vision_expert_dense
+model.layers.15.self_attn.language_expert_query_key_value
+model.layers.15.self_attn.language_expert_dense
+model.layers.15.mlp
+model.layers.15.mlp.language_mlp
+model.layers.15.mlp.language_mlp.gate_proj
+model.layers.15.mlp.language_mlp.up_proj
+model.layers.15.mlp.language_mlp.down_proj
+model.layers.15.mlp.language_mlp.act_fn
+model.layers.15.mlp.vision_mlp
+model.layers.15.mlp.vision_mlp.gate_proj
+model.layers.15.mlp.vision_mlp.up_proj
+model.layers.15.mlp.vision_mlp.down_proj
+model.layers.15.mlp.vision_mlp.act_fn
+model.layers.15.input_layernorm
+model.layers.15.post_attention_layernorm
+model.layers.16
+model.layers.16.self_attn
+model.layers.16.self_attn.rotary_emb
+model.layers.16.self_attn.vision_expert_query_key_value
+model.layers.16.self_attn.vision_expert_dense
+model.layers.16.self_attn.language_expert_query_key_value
+model.layers.16.self_attn.language_expert_dense
+model.layers.16.mlp
+model.layers.16.mlp.language_mlp
+model.layers.16.mlp.language_mlp.gate_proj
+model.layers.16.mlp.language_mlp.up_proj
+model.layers.16.mlp.language_mlp.down_proj
+model.layers.16.mlp.language_mlp.act_fn
+model.layers.16.mlp.vision_mlp
+model.layers.16.mlp.vision_mlp.gate_proj
+model.layers.16.mlp.vision_mlp.up_proj
+model.layers.16.mlp.vision_mlp.down_proj
+model.layers.16.mlp.vision_mlp.act_fn
+model.layers.16.input_layernorm
+model.layers.16.post_attention_layernorm
+model.layers.17
+model.layers.17.self_attn
+model.layers.17.self_attn.rotary_emb
+model.layers.17.self_attn.vision_expert_query_key_value
+model.layers.17.self_attn.vision_expert_dense
+model.layers.17.self_attn.language_expert_query_key_value
+model.layers.17.self_attn.language_expert_dense
+model.layers.17.mlp
+model.layers.17.mlp.language_mlp
+model.layers.17.mlp.language_mlp.gate_proj
+model.layers.17.mlp.language_mlp.up_proj
+model.layers.17.mlp.language_mlp.down_proj
+model.layers.17.mlp.language_mlp.act_fn
+model.layers.17.mlp.vision_mlp
+model.layers.17.mlp.vision_mlp.gate_proj
+model.layers.17.mlp.vision_mlp.up_proj
+model.layers.17.mlp.vision_mlp.down_proj
+model.layers.17.mlp.vision_mlp.act_fn
+model.layers.17.input_layernorm
+model.layers.17.post_attention_layernorm
+model.layers.18
+model.layers.18.self_attn
+model.layers.18.self_attn.rotary_emb
+model.layers.18.self_attn.vision_expert_query_key_value
+model.layers.18.self_attn.vision_expert_dense
+model.layers.18.self_attn.language_expert_query_key_value
+model.layers.18.self_attn.language_expert_dense
+model.layers.18.mlp
+model.layers.18.mlp.language_mlp
+model.layers.18.mlp.language_mlp.gate_proj
+model.layers.18.mlp.language_mlp.up_proj
+model.layers.18.mlp.language_mlp.down_proj
+model.layers.18.mlp.language_mlp.act_fn
+model.layers.18.mlp.vision_mlp
+model.layers.18.mlp.vision_mlp.gate_proj
+model.layers.18.mlp.vision_mlp.up_proj
+model.layers.18.mlp.vision_mlp.down_proj
+model.layers.18.mlp.vision_mlp.act_fn
+model.layers.18.input_layernorm
+model.layers.18.post_attention_layernorm
+model.layers.19
+model.layers.19.self_attn
+model.layers.19.self_attn.rotary_emb
+model.layers.19.self_attn.vision_expert_query_key_value
+model.layers.19.self_attn.vision_expert_dense
+model.layers.19.self_attn.language_expert_query_key_value
+model.layers.19.self_attn.language_expert_dense
+model.layers.19.mlp
+model.layers.19.mlp.language_mlp
+model.layers.19.mlp.language_mlp.gate_proj
+model.layers.19.mlp.language_mlp.up_proj
+model.layers.19.mlp.language_mlp.down_proj
+model.layers.19.mlp.language_mlp.act_fn
+model.layers.19.mlp.vision_mlp
+model.layers.19.mlp.vision_mlp.gate_proj
+model.layers.19.mlp.vision_mlp.up_proj
+model.layers.19.mlp.vision_mlp.down_proj
+model.layers.19.mlp.vision_mlp.act_fn
+model.layers.19.input_layernorm
+model.layers.19.post_attention_layernorm
+model.layers.20
+model.layers.20.self_attn
+model.layers.20.self_attn.rotary_emb
+model.layers.20.self_attn.vision_expert_query_key_value
+model.layers.20.self_attn.vision_expert_dense
+model.layers.20.self_attn.language_expert_query_key_value
+model.layers.20.self_attn.language_expert_dense
+model.layers.20.mlp
+model.layers.20.mlp.language_mlp
+model.layers.20.mlp.language_mlp.gate_proj
+model.layers.20.mlp.language_mlp.up_proj
+model.layers.20.mlp.language_mlp.down_proj
+model.layers.20.mlp.language_mlp.act_fn
+model.layers.20.mlp.vision_mlp
+model.layers.20.mlp.vision_mlp.gate_proj
+model.layers.20.mlp.vision_mlp.up_proj
+model.layers.20.mlp.vision_mlp.down_proj
+model.layers.20.mlp.vision_mlp.act_fn
+model.layers.20.input_layernorm
+model.layers.20.post_attention_layernorm
+model.layers.21
+model.layers.21.self_attn
+model.layers.21.self_attn.rotary_emb
+model.layers.21.self_attn.vision_expert_query_key_value
+model.layers.21.self_attn.vision_expert_dense
+model.layers.21.self_attn.language_expert_query_key_value
+model.layers.21.self_attn.language_expert_dense
+model.layers.21.mlp
+model.layers.21.mlp.language_mlp
+model.layers.21.mlp.language_mlp.gate_proj
+model.layers.21.mlp.language_mlp.up_proj
+model.layers.21.mlp.language_mlp.down_proj
+model.layers.21.mlp.language_mlp.act_fn
+model.layers.21.mlp.vision_mlp
+model.layers.21.mlp.vision_mlp.gate_proj
+model.layers.21.mlp.vision_mlp.up_proj
+model.layers.21.mlp.vision_mlp.down_proj
+model.layers.21.mlp.vision_mlp.act_fn
+model.layers.21.input_layernorm
+model.layers.21.post_attention_layernorm
+model.layers.22
+model.layers.22.self_attn
+model.layers.22.self_attn.rotary_emb
+model.layers.22.self_attn.vision_expert_query_key_value
+model.layers.22.self_attn.vision_expert_dense
+model.layers.22.self_attn.language_expert_query_key_value
+model.layers.22.self_attn.language_expert_dense
+model.layers.22.mlp
+model.layers.22.mlp.language_mlp
+model.layers.22.mlp.language_mlp.gate_proj
+model.layers.22.mlp.language_mlp.up_proj
+model.layers.22.mlp.language_mlp.down_proj
+model.layers.22.mlp.language_mlp.act_fn
+model.layers.22.mlp.vision_mlp
+model.layers.22.mlp.vision_mlp.gate_proj
+model.layers.22.mlp.vision_mlp.up_proj
+model.layers.22.mlp.vision_mlp.down_proj
+model.layers.22.mlp.vision_mlp.act_fn
+model.layers.22.input_layernorm
+model.layers.22.post_attention_layernorm
+model.layers.23
+model.layers.23.self_attn
+model.layers.23.self_attn.rotary_emb
+model.layers.23.self_attn.vision_expert_query_key_value
+model.layers.23.self_attn.vision_expert_dense
+model.layers.23.self_attn.language_expert_query_key_value
+model.layers.23.self_attn.language_expert_dense
+model.layers.23.mlp
+model.layers.23.mlp.language_mlp
+model.layers.23.mlp.language_mlp.gate_proj
+model.layers.23.mlp.language_mlp.up_proj
+model.layers.23.mlp.language_mlp.down_proj
+model.layers.23.mlp.language_mlp.act_fn
+model.layers.23.mlp.vision_mlp
+model.layers.23.mlp.vision_mlp.gate_proj
+model.layers.23.mlp.vision_mlp.up_proj
+model.layers.23.mlp.vision_mlp.down_proj
+model.layers.23.mlp.vision_mlp.act_fn
+model.layers.23.input_layernorm
+model.layers.23.post_attention_layernorm
+model.layers.24
+model.layers.24.self_attn
+model.layers.24.self_attn.rotary_emb
+model.layers.24.self_attn.vision_expert_query_key_value
+model.layers.24.self_attn.vision_expert_dense
+model.layers.24.self_attn.language_expert_query_key_value
+model.layers.24.self_attn.language_expert_dense
+model.layers.24.mlp
+model.layers.24.mlp.language_mlp
+model.layers.24.mlp.language_mlp.gate_proj
+model.layers.24.mlp.language_mlp.up_proj
+model.layers.24.mlp.language_mlp.down_proj
+model.layers.24.mlp.language_mlp.act_fn
+model.layers.24.mlp.vision_mlp
+model.layers.24.mlp.vision_mlp.gate_proj
+model.layers.24.mlp.vision_mlp.up_proj
+model.layers.24.mlp.vision_mlp.down_proj
+model.layers.24.mlp.vision_mlp.act_fn
+model.layers.24.input_layernorm
+model.layers.24.post_attention_layernorm
+model.layers.25
+model.layers.25.self_attn
+model.layers.25.self_attn.rotary_emb
+model.layers.25.self_attn.vision_expert_query_key_value
+model.layers.25.self_attn.vision_expert_dense
+model.layers.25.self_attn.language_expert_query_key_value
+model.layers.25.self_attn.language_expert_dense
+model.layers.25.mlp
+model.layers.25.mlp.language_mlp
+model.layers.25.mlp.language_mlp.gate_proj
+model.layers.25.mlp.language_mlp.up_proj
+model.layers.25.mlp.language_mlp.down_proj
+model.layers.25.mlp.language_mlp.act_fn
+model.layers.25.mlp.vision_mlp
+model.layers.25.mlp.vision_mlp.gate_proj
+model.layers.25.mlp.vision_mlp.up_proj
+model.layers.25.mlp.vision_mlp.down_proj
+model.layers.25.mlp.vision_mlp.act_fn
+model.layers.25.input_layernorm
+model.layers.25.post_attention_layernorm
+model.layers.26
+model.layers.26.self_attn
+model.layers.26.self_attn.rotary_emb
+model.layers.26.self_attn.vision_expert_query_key_value
+model.layers.26.self_attn.vision_expert_dense
+model.layers.26.self_attn.language_expert_query_key_value
+model.layers.26.self_attn.language_expert_dense
+model.layers.26.mlp
+model.layers.26.mlp.language_mlp
+model.layers.26.mlp.language_mlp.gate_proj
+model.layers.26.mlp.language_mlp.up_proj
+model.layers.26.mlp.language_mlp.down_proj
+model.layers.26.mlp.language_mlp.act_fn
+model.layers.26.mlp.vision_mlp
+model.layers.26.mlp.vision_mlp.gate_proj
+model.layers.26.mlp.vision_mlp.up_proj
+model.layers.26.mlp.vision_mlp.down_proj
+model.layers.26.mlp.vision_mlp.act_fn
+model.layers.26.input_layernorm
+model.layers.26.post_attention_layernorm
+model.layers.27
+model.layers.27.self_attn
+model.layers.27.self_attn.rotary_emb
+model.layers.27.self_attn.vision_expert_query_key_value
+model.layers.27.self_attn.vision_expert_dense
+model.layers.27.self_attn.language_expert_query_key_value
+model.layers.27.self_attn.language_expert_dense
+model.layers.27.mlp
+model.layers.27.mlp.language_mlp
+model.layers.27.mlp.language_mlp.gate_proj
+model.layers.27.mlp.language_mlp.up_proj
+model.layers.27.mlp.language_mlp.down_proj
+model.layers.27.mlp.language_mlp.act_fn
+model.layers.27.mlp.vision_mlp
+model.layers.27.mlp.vision_mlp.gate_proj
+model.layers.27.mlp.vision_mlp.up_proj
+model.layers.27.mlp.vision_mlp.down_proj
+model.layers.27.mlp.vision_mlp.act_fn
+model.layers.27.input_layernorm
+model.layers.27.post_attention_layernorm
+model.layers.28
+model.layers.28.self_attn
+model.layers.28.self_attn.rotary_emb
+model.layers.28.self_attn.vision_expert_query_key_value
+model.layers.28.self_attn.vision_expert_dense
+model.layers.28.self_attn.language_expert_query_key_value
+model.layers.28.self_attn.language_expert_dense
+model.layers.28.mlp
+model.layers.28.mlp.language_mlp
+model.layers.28.mlp.language_mlp.gate_proj
+model.layers.28.mlp.language_mlp.up_proj
+model.layers.28.mlp.language_mlp.down_proj
+model.layers.28.mlp.language_mlp.act_fn
+model.layers.28.mlp.vision_mlp
+model.layers.28.mlp.vision_mlp.gate_proj
+model.layers.28.mlp.vision_mlp.up_proj
+model.layers.28.mlp.vision_mlp.down_proj
+model.layers.28.mlp.vision_mlp.act_fn
+model.layers.28.input_layernorm
+model.layers.28.post_attention_layernorm
+model.layers.29
+model.layers.29.self_attn
+model.layers.29.self_attn.rotary_emb
+model.layers.29.self_attn.vision_expert_query_key_value
+model.layers.29.self_attn.vision_expert_dense
+model.layers.29.self_attn.language_expert_query_key_value
+model.layers.29.self_attn.language_expert_dense
+model.layers.29.mlp
+model.layers.29.mlp.language_mlp
+model.layers.29.mlp.language_mlp.gate_proj
+model.layers.29.mlp.language_mlp.up_proj
+model.layers.29.mlp.language_mlp.down_proj
+model.layers.29.mlp.language_mlp.act_fn
+model.layers.29.mlp.vision_mlp
+model.layers.29.mlp.vision_mlp.gate_proj
+model.layers.29.mlp.vision_mlp.up_proj
+model.layers.29.mlp.vision_mlp.down_proj
+model.layers.29.mlp.vision_mlp.act_fn
+model.layers.29.input_layernorm
+model.layers.29.post_attention_layernorm
+model.layers.30
+model.layers.30.self_attn
+model.layers.30.self_attn.rotary_emb
+model.layers.30.self_attn.vision_expert_query_key_value
+model.layers.30.self_attn.vision_expert_dense
+model.layers.30.self_attn.language_expert_query_key_value
+model.layers.30.self_attn.language_expert_dense
+model.layers.30.mlp
+model.layers.30.mlp.language_mlp
+model.layers.30.mlp.language_mlp.gate_proj
+model.layers.30.mlp.language_mlp.up_proj
+model.layers.30.mlp.language_mlp.down_proj
+model.layers.30.mlp.language_mlp.act_fn
+model.layers.30.mlp.vision_mlp
+model.layers.30.mlp.vision_mlp.gate_proj
+model.layers.30.mlp.vision_mlp.up_proj
+model.layers.30.mlp.vision_mlp.down_proj
+model.layers.30.mlp.vision_mlp.act_fn
+model.layers.30.input_layernorm
+model.layers.30.post_attention_layernorm
+model.layers.31
+model.layers.31.self_attn
+model.layers.31.self_attn.rotary_emb
+model.layers.31.self_attn.vision_expert_query_key_value
+model.layers.31.self_attn.vision_expert_dense
+model.layers.31.self_attn.language_expert_query_key_value
+model.layers.31.self_attn.language_expert_dense
+model.layers.31.mlp
+model.layers.31.mlp.language_mlp
+model.layers.31.mlp.language_mlp.gate_proj
+model.layers.31.mlp.language_mlp.up_proj
+model.layers.31.mlp.language_mlp.down_proj
+model.layers.31.mlp.language_mlp.act_fn
+model.layers.31.mlp.vision_mlp
+model.layers.31.mlp.vision_mlp.gate_proj
+model.layers.31.mlp.vision_mlp.up_proj
+model.layers.31.mlp.vision_mlp.down_proj
+model.layers.31.mlp.vision_mlp.act_fn
+model.layers.31.input_layernorm
+model.layers.31.post_attention_layernorm
+model.norm
+model.vision
+model.vision.patch_embedding
+model.vision.patch_embedding.proj
+model.vision.patch_embedding.position_embedding
+model.vision.transformer
+model.vision.transformer.layers
+model.vision.transformer.layers.0
+model.vision.transformer.layers.0.input_layernorm
+model.vision.transformer.layers.0.attention
+model.vision.transformer.layers.0.attention.query_key_value
+model.vision.transformer.layers.0.attention.dense
+model.vision.transformer.layers.0.attention.output_dropout
+model.vision.transformer.layers.0.mlp
+model.vision.transformer.layers.0.mlp.activation_fn
+model.vision.transformer.layers.0.mlp.fc1
+model.vision.transformer.layers.0.mlp.fc2
+model.vision.transformer.layers.0.post_attention_layernorm
+model.vision.transformer.layers.1
+model.vision.transformer.layers.1.input_layernorm
+model.vision.transformer.layers.1.attention
+model.vision.transformer.layers.1.attention.query_key_value
+model.vision.transformer.layers.1.attention.dense
+model.vision.transformer.layers.1.attention.output_dropout
+model.vision.transformer.layers.1.mlp
+model.vision.transformer.layers.1.mlp.activation_fn
+model.vision.transformer.layers.1.mlp.fc1
+model.vision.transformer.layers.1.mlp.fc2
+model.vision.transformer.layers.1.post_attention_layernorm
+model.vision.transformer.layers.2
+model.vision.transformer.layers.2.input_layernorm
+model.vision.transformer.layers.2.attention
+model.vision.transformer.layers.2.attention.query_key_value
+model.vision.transformer.layers.2.attention.dense
+model.vision.transformer.layers.2.attention.output_dropout
+model.vision.transformer.layers.2.mlp
+model.vision.transformer.layers.2.mlp.activation_fn
+model.vision.transformer.layers.2.mlp.fc1
+model.vision.transformer.layers.2.mlp.fc2
+model.vision.transformer.layers.2.post_attention_layernorm
+model.vision.transformer.layers.3
+model.vision.transformer.layers.3.input_layernorm
+model.vision.transformer.layers.3.attention
+model.vision.transformer.layers.3.attention.query_key_value
+model.vision.transformer.layers.3.attention.dense
+model.vision.transformer.layers.3.attention.output_dropout
+model.vision.transformer.layers.3.mlp
+model.vision.transformer.layers.3.mlp.activation_fn
+model.vision.transformer.layers.3.mlp.fc1
+model.vision.transformer.layers.3.mlp.fc2
+model.vision.transformer.layers.3.post_attention_layernorm
+model.vision.transformer.layers.4
+model.vision.transformer.layers.4.input_layernorm
+model.vision.transformer.layers.4.attention
+model.vision.transformer.layers.4.attention.query_key_value
+model.vision.transformer.layers.4.attention.dense
+model.vision.transformer.layers.4.attention.output_dropout
+model.vision.transformer.layers.4.mlp
+model.vision.transformer.layers.4.mlp.activation_fn
+model.vision.transformer.layers.4.mlp.fc1
+model.vision.transformer.layers.4.mlp.fc2
+model.vision.transformer.layers.4.post_attention_layernorm
+model.vision.transformer.layers.5
+model.vision.transformer.layers.5.input_layernorm
+model.vision.transformer.layers.5.attention
+model.vision.transformer.layers.5.attention.query_key_value
+model.vision.transformer.layers.5.attention.dense
+model.vision.transformer.layers.5.attention.output_dropout
+model.vision.transformer.layers.5.mlp
+model.vision.transformer.layers.5.mlp.activation_fn
+model.vision.transformer.layers.5.mlp.fc1
+model.vision.transformer.layers.5.mlp.fc2
+model.vision.transformer.layers.5.post_attention_layernorm
+model.vision.transformer.layers.6
+model.vision.transformer.layers.6.input_layernorm
+model.vision.transformer.layers.6.attention
+model.vision.transformer.layers.6.attention.query_key_value
+model.vision.transformer.layers.6.attention.dense
+model.vision.transformer.layers.6.attention.output_dropout
+model.vision.transformer.layers.6.mlp
+model.vision.transformer.layers.6.mlp.activation_fn
+model.vision.transformer.layers.6.mlp.fc1
+model.vision.transformer.layers.6.mlp.fc2
+model.vision.transformer.layers.6.post_attention_layernorm
+model.vision.transformer.layers.7
+model.vision.transformer.layers.7.input_layernorm
+model.vision.transformer.layers.7.attention
+model.vision.transformer.layers.7.attention.query_key_value
+model.vision.transformer.layers.7.attention.dense
+model.vision.transformer.layers.7.attention.output_dropout
+model.vision.transformer.layers.7.mlp
+model.vision.transformer.layers.7.mlp.activation_fn
+model.vision.transformer.layers.7.mlp.fc1
+model.vision.transformer.layers.7.mlp.fc2
+model.vision.transformer.layers.7.post_attention_layernorm
+model.vision.transformer.layers.8
+model.vision.transformer.layers.8.input_layernorm
+model.vision.transformer.layers.8.attention
+model.vision.transformer.layers.8.attention.query_key_value
+model.vision.transformer.layers.8.attention.dense
+model.vision.transformer.layers.8.attention.output_dropout
+model.vision.transformer.layers.8.mlp
+model.vision.transformer.layers.8.mlp.activation_fn
+model.vision.transformer.layers.8.mlp.fc1
+model.vision.transformer.layers.8.mlp.fc2
+model.vision.transformer.layers.8.post_attention_layernorm
+model.vision.transformer.layers.9
+model.vision.transformer.layers.9.input_layernorm
+model.vision.transformer.layers.9.attention
+model.vision.transformer.layers.9.attention.query_key_value
+model.vision.transformer.layers.9.attention.dense
+model.vision.transformer.layers.9.attention.output_dropout
+model.vision.transformer.layers.9.mlp
+model.vision.transformer.layers.9.mlp.activation_fn
+model.vision.transformer.layers.9.mlp.fc1
+model.vision.transformer.layers.9.mlp.fc2
+model.vision.transformer.layers.9.post_attention_layernorm
+model.vision.transformer.layers.10
+model.vision.transformer.layers.10.input_layernorm
+model.vision.transformer.layers.10.attention
+model.vision.transformer.layers.10.attention.query_key_value
+model.vision.transformer.layers.10.attention.dense
+model.vision.transformer.layers.10.attention.output_dropout
+model.vision.transformer.layers.10.mlp
+model.vision.transformer.layers.10.mlp.activation_fn
+model.vision.transformer.layers.10.mlp.fc1
+model.vision.transformer.layers.10.mlp.fc2
+model.vision.transformer.layers.10.post_attention_layernorm
+model.vision.transformer.layers.11
+model.vision.transformer.layers.11.input_layernorm
+model.vision.transformer.layers.11.attention
+model.vision.transformer.layers.11.attention.query_key_value
+model.vision.transformer.layers.11.attention.dense
+model.vision.transformer.layers.11.attention.output_dropout
+model.vision.transformer.layers.11.mlp
+model.vision.transformer.layers.11.mlp.activation_fn
+model.vision.transformer.layers.11.mlp.fc1
+model.vision.transformer.layers.11.mlp.fc2
+model.vision.transformer.layers.11.post_attention_layernorm
+model.vision.transformer.layers.12
+model.vision.transformer.layers.12.input_layernorm
+model.vision.transformer.layers.12.attention
+model.vision.transformer.layers.12.attention.query_key_value
+model.vision.transformer.layers.12.attention.dense
+model.vision.transformer.layers.12.attention.output_dropout
+model.vision.transformer.layers.12.mlp
+model.vision.transformer.layers.12.mlp.activation_fn
+model.vision.transformer.layers.12.mlp.fc1
+model.vision.transformer.layers.12.mlp.fc2
+model.vision.transformer.layers.12.post_attention_layernorm
+model.vision.transformer.layers.13
+model.vision.transformer.layers.13.input_layernorm
+model.vision.transformer.layers.13.attention
+model.vision.transformer.layers.13.attention.query_key_value
+model.vision.transformer.layers.13.attention.dense
+model.vision.transformer.layers.13.attention.output_dropout
+model.vision.transformer.layers.13.mlp
+model.vision.transformer.layers.13.mlp.activation_fn
+model.vision.transformer.layers.13.mlp.fc1
+model.vision.transformer.layers.13.mlp.fc2
+model.vision.transformer.layers.13.post_attention_layernorm
+model.vision.transformer.layers.14
+model.vision.transformer.layers.14.input_layernorm
+model.vision.transformer.layers.14.attention
+model.vision.transformer.layers.14.attention.query_key_value
+model.vision.transformer.layers.14.attention.dense
+model.vision.transformer.layers.14.attention.output_dropout
+model.vision.transformer.layers.14.mlp
+model.vision.transformer.layers.14.mlp.activation_fn
+model.vision.transformer.layers.14.mlp.fc1
+model.vision.transformer.layers.14.mlp.fc2
+model.vision.transformer.layers.14.post_attention_layernorm
+model.vision.transformer.layers.15
+model.vision.transformer.layers.15.input_layernorm
+model.vision.transformer.layers.15.attention
+model.vision.transformer.layers.15.attention.query_key_value
+model.vision.transformer.layers.15.attention.dense
+model.vision.transformer.layers.15.attention.output_dropout
+model.vision.transformer.layers.15.mlp
+model.vision.transformer.layers.15.mlp.activation_fn
+model.vision.transformer.layers.15.mlp.fc1
+model.vision.transformer.layers.15.mlp.fc2
+model.vision.transformer.layers.15.post_attention_layernorm
+model.vision.transformer.layers.16
+model.vision.transformer.layers.16.input_layernorm
+model.vision.transformer.layers.16.attention
+model.vision.transformer.layers.16.attention.query_key_value
+model.vision.transformer.layers.16.attention.dense
+model.vision.transformer.layers.16.attention.output_dropout
+model.vision.transformer.layers.16.mlp
+model.vision.transformer.layers.16.mlp.activation_fn
+model.vision.transformer.layers.16.mlp.fc1
+model.vision.transformer.layers.16.mlp.fc2
+model.vision.transformer.layers.16.post_attention_layernorm
+model.vision.transformer.layers.17
+model.vision.transformer.layers.17.input_layernorm
+model.vision.transformer.layers.17.attention
+model.vision.transformer.layers.17.attention.query_key_value
+model.vision.transformer.layers.17.attention.dense
+model.vision.transformer.layers.17.attention.output_dropout
+model.vision.transformer.layers.17.mlp
+model.vision.transformer.layers.17.mlp.activation_fn
+model.vision.transformer.layers.17.mlp.fc1
+model.vision.transformer.layers.17.mlp.fc2
+model.vision.transformer.layers.17.post_attention_layernorm
+model.vision.transformer.layers.18
+model.vision.transformer.layers.18.input_layernorm
+model.vision.transformer.layers.18.attention
+model.vision.transformer.layers.18.attention.query_key_value
+model.vision.transformer.layers.18.attention.dense
+model.vision.transformer.layers.18.attention.output_dropout
+model.vision.transformer.layers.18.mlp
+model.vision.transformer.layers.18.mlp.activation_fn
+model.vision.transformer.layers.18.mlp.fc1
+model.vision.transformer.layers.18.mlp.fc2
+model.vision.transformer.layers.18.post_attention_layernorm
+model.vision.transformer.layers.19
+model.vision.transformer.layers.19.input_layernorm
+model.vision.transformer.layers.19.attention
+model.vision.transformer.layers.19.attention.query_key_value
+model.vision.transformer.layers.19.attention.dense
+model.vision.transformer.layers.19.attention.output_dropout
+model.vision.transformer.layers.19.mlp
+model.vision.transformer.layers.19.mlp.activation_fn
+model.vision.transformer.layers.19.mlp.fc1
+model.vision.transformer.layers.19.mlp.fc2
+model.vision.transformer.layers.19.post_attention_layernorm
+model.vision.transformer.layers.20
+model.vision.transformer.layers.20.input_layernorm
+model.vision.transformer.layers.20.attention
+model.vision.transformer.layers.20.attention.query_key_value
+model.vision.transformer.layers.20.attention.dense
+model.vision.transformer.layers.20.attention.output_dropout
+model.vision.transformer.layers.20.mlp
+model.vision.transformer.layers.20.mlp.activation_fn
+model.vision.transformer.layers.20.mlp.fc1
+model.vision.transformer.layers.20.mlp.fc2
+model.vision.transformer.layers.20.post_attention_layernorm
+model.vision.transformer.layers.21
+model.vision.transformer.layers.21.input_layernorm
+model.vision.transformer.layers.21.attention
+model.vision.transformer.layers.21.attention.query_key_value
+model.vision.transformer.layers.21.attention.dense
+model.vision.transformer.layers.21.attention.output_dropout
+model.vision.transformer.layers.21.mlp
+model.vision.transformer.layers.21.mlp.activation_fn
+model.vision.transformer.layers.21.mlp.fc1
+model.vision.transformer.layers.21.mlp.fc2
+model.vision.transformer.layers.21.post_attention_layernorm
+model.vision.transformer.layers.22
+model.vision.transformer.layers.22.input_layernorm
+model.vision.transformer.layers.22.attention
+model.vision.transformer.layers.22.attention.query_key_value
+model.vision.transformer.layers.22.attention.dense
+model.vision.transformer.layers.22.attention.output_dropout
+model.vision.transformer.layers.22.mlp
+model.vision.transformer.layers.22.mlp.activation_fn
+model.vision.transformer.layers.22.mlp.fc1
+model.vision.transformer.layers.22.mlp.fc2
+model.vision.transformer.layers.22.post_attention_layernorm
+model.vision.transformer.layers.23
+model.vision.transformer.layers.23.input_layernorm
+model.vision.transformer.layers.23.attention
+model.vision.transformer.layers.23.attention.query_key_value
+model.vision.transformer.layers.23.attention.dense
+model.vision.transformer.layers.23.attention.output_dropout
+model.vision.transformer.layers.23.mlp
+model.vision.transformer.layers.23.mlp.activation_fn
+model.vision.transformer.layers.23.mlp.fc1
+model.vision.transformer.layers.23.mlp.fc2
+model.vision.transformer.layers.23.post_attention_layernorm
+model.vision.transformer.layers.24
+model.vision.transformer.layers.24.input_layernorm
+model.vision.transformer.layers.24.attention
+model.vision.transformer.layers.24.attention.query_key_value
+model.vision.transformer.layers.24.attention.dense
+model.vision.transformer.layers.24.attention.output_dropout
+model.vision.transformer.layers.24.mlp
+model.vision.transformer.layers.24.mlp.activation_fn
+model.vision.transformer.layers.24.mlp.fc1
+model.vision.transformer.layers.24.mlp.fc2
+model.vision.transformer.layers.24.post_attention_layernorm
+model.vision.transformer.layers.25
+model.vision.transformer.layers.25.input_layernorm
+model.vision.transformer.layers.25.attention
+model.vision.transformer.layers.25.attention.query_key_value
+model.vision.transformer.layers.25.attention.dense
+model.vision.transformer.layers.25.attention.output_dropout
+model.vision.transformer.layers.25.mlp
+model.vision.transformer.layers.25.mlp.activation_fn
+model.vision.transformer.layers.25.mlp.fc1
+model.vision.transformer.layers.25.mlp.fc2
+model.vision.transformer.layers.25.post_attention_layernorm
+model.vision.transformer.layers.26
+model.vision.transformer.layers.26.input_layernorm
+model.vision.transformer.layers.26.attention
+model.vision.transformer.layers.26.attention.query_key_value
+model.vision.transformer.layers.26.attention.dense
+model.vision.transformer.layers.26.attention.output_dropout
+model.vision.transformer.layers.26.mlp
+model.vision.transformer.layers.26.mlp.activation_fn
+model.vision.transformer.layers.26.mlp.fc1
+model.vision.transformer.layers.26.mlp.fc2
+model.vision.transformer.layers.26.post_attention_layernorm
+model.vision.transformer.layers.27
+model.vision.transformer.layers.27.input_layernorm
+model.vision.transformer.layers.27.attention
+model.vision.transformer.layers.27.attention.query_key_value
+model.vision.transformer.layers.27.attention.dense
+model.vision.transformer.layers.27.attention.output_dropout
+model.vision.transformer.layers.27.mlp
+model.vision.transformer.layers.27.mlp.activation_fn
+model.vision.transformer.layers.27.mlp.fc1
+model.vision.transformer.layers.27.mlp.fc2
+model.vision.transformer.layers.27.post_attention_layernorm
+model.vision.transformer.layers.28
+model.vision.transformer.layers.28.input_layernorm
+model.vision.transformer.layers.28.attention
+model.vision.transformer.layers.28.attention.query_key_value
+model.vision.transformer.layers.28.attention.dense
+model.vision.transformer.layers.28.attention.output_dropout
+model.vision.transformer.layers.28.mlp
+model.vision.transformer.layers.28.mlp.activation_fn
+model.vision.transformer.layers.28.mlp.fc1
+model.vision.transformer.layers.28.mlp.fc2
+model.vision.transformer.layers.28.post_attention_layernorm
+model.vision.transformer.layers.29
+model.vision.transformer.layers.29.input_layernorm
+model.vision.transformer.layers.29.attention
+model.vision.transformer.layers.29.attention.query_key_value
+model.vision.transformer.layers.29.attention.dense
+model.vision.transformer.layers.29.attention.output_dropout
+model.vision.transformer.layers.29.mlp
+model.vision.transformer.layers.29.mlp.activation_fn
+model.vision.transformer.layers.29.mlp.fc1
+model.vision.transformer.layers.29.mlp.fc2
+model.vision.transformer.layers.29.post_attention_layernorm
+model.vision.transformer.layers.30
+model.vision.transformer.layers.30.input_layernorm
+model.vision.transformer.layers.30.attention
+model.vision.transformer.layers.30.attention.query_key_value
+model.vision.transformer.layers.30.attention.dense
+model.vision.transformer.layers.30.attention.output_dropout
+model.vision.transformer.layers.30.mlp
+model.vision.transformer.layers.30.mlp.activation_fn
+model.vision.transformer.layers.30.mlp.fc1
+model.vision.transformer.layers.30.mlp.fc2
+model.vision.transformer.layers.30.post_attention_layernorm
+model.vision.transformer.layers.31
+model.vision.transformer.layers.31.input_layernorm
+model.vision.transformer.layers.31.attention
+model.vision.transformer.layers.31.attention.query_key_value
+model.vision.transformer.layers.31.attention.dense
+model.vision.transformer.layers.31.attention.output_dropout
+model.vision.transformer.layers.31.mlp
+model.vision.transformer.layers.31.mlp.activation_fn
+model.vision.transformer.layers.31.mlp.fc1
+model.vision.transformer.layers.31.mlp.fc2
+model.vision.transformer.layers.31.post_attention_layernorm
+model.vision.transformer.layers.32
+model.vision.transformer.layers.32.input_layernorm
+model.vision.transformer.layers.32.attention
+model.vision.transformer.layers.32.attention.query_key_value
+model.vision.transformer.layers.32.attention.dense
+model.vision.transformer.layers.32.attention.output_dropout
+model.vision.transformer.layers.32.mlp
+model.vision.transformer.layers.32.mlp.activation_fn
+model.vision.transformer.layers.32.mlp.fc1
+model.vision.transformer.layers.32.mlp.fc2
+model.vision.transformer.layers.32.post_attention_layernorm
+model.vision.transformer.layers.33
+model.vision.transformer.layers.33.input_layernorm
+model.vision.transformer.layers.33.attention
+model.vision.transformer.layers.33.attention.query_key_value
+model.vision.transformer.layers.33.attention.dense
+model.vision.transformer.layers.33.attention.output_dropout
+model.vision.transformer.layers.33.mlp
+model.vision.transformer.layers.33.mlp.activation_fn
+model.vision.transformer.layers.33.mlp.fc1
+model.vision.transformer.layers.33.mlp.fc2
+model.vision.transformer.layers.33.post_attention_layernorm
+model.vision.transformer.layers.34
+model.vision.transformer.layers.34.input_layernorm
+model.vision.transformer.layers.34.attention
+model.vision.transformer.layers.34.attention.query_key_value
+model.vision.transformer.layers.34.attention.dense
+model.vision.transformer.layers.34.attention.output_dropout
+model.vision.transformer.layers.34.mlp
+model.vision.transformer.layers.34.mlp.activation_fn
+model.vision.transformer.layers.34.mlp.fc1
+model.vision.transformer.layers.34.mlp.fc2
+model.vision.transformer.layers.34.post_attention_layernorm
+model.vision.transformer.layers.35
+model.vision.transformer.layers.35.input_layernorm
+model.vision.transformer.layers.35.attention
+model.vision.transformer.layers.35.attention.query_key_value
+model.vision.transformer.layers.35.attention.dense
+model.vision.transformer.layers.35.attention.output_dropout
+model.vision.transformer.layers.35.mlp
+model.vision.transformer.layers.35.mlp.activation_fn
+model.vision.transformer.layers.35.mlp.fc1
+model.vision.transformer.layers.35.mlp.fc2
+model.vision.transformer.layers.35.post_attention_layernorm
+model.vision.transformer.layers.36
+model.vision.transformer.layers.36.input_layernorm
+model.vision.transformer.layers.36.attention
+model.vision.transformer.layers.36.attention.query_key_value
+model.vision.transformer.layers.36.attention.dense
+model.vision.transformer.layers.36.attention.output_dropout
+model.vision.transformer.layers.36.mlp
+model.vision.transformer.layers.36.mlp.activation_fn
+model.vision.transformer.layers.36.mlp.fc1
+model.vision.transformer.layers.36.mlp.fc2
+model.vision.transformer.layers.36.post_attention_layernorm
+model.vision.transformer.layers.37
+model.vision.transformer.layers.37.input_layernorm
+model.vision.transformer.layers.37.attention
+model.vision.transformer.layers.37.attention.query_key_value
+model.vision.transformer.layers.37.attention.dense
+model.vision.transformer.layers.37.attention.output_dropout
+model.vision.transformer.layers.37.mlp
+model.vision.transformer.layers.37.mlp.activation_fn
+model.vision.transformer.layers.37.mlp.fc1
+model.vision.transformer.layers.37.mlp.fc2
+model.vision.transformer.layers.37.post_attention_layernorm
+model.vision.transformer.layers.38
+model.vision.transformer.layers.38.input_layernorm
+model.vision.transformer.layers.38.attention
+model.vision.transformer.layers.38.attention.query_key_value
+model.vision.transformer.layers.38.attention.dense
+model.vision.transformer.layers.38.attention.output_dropout
+model.vision.transformer.layers.38.mlp
+model.vision.transformer.layers.38.mlp.activation_fn
+model.vision.transformer.layers.38.mlp.fc1
+model.vision.transformer.layers.38.mlp.fc2
+model.vision.transformer.layers.38.post_attention_layernorm
+model.vision.transformer.layers.39
+model.vision.transformer.layers.39.input_layernorm
+model.vision.transformer.layers.39.attention
+model.vision.transformer.layers.39.attention.query_key_value
+model.vision.transformer.layers.39.attention.dense
+model.vision.transformer.layers.39.attention.output_dropout
+model.vision.transformer.layers.39.mlp
+model.vision.transformer.layers.39.mlp.activation_fn
+model.vision.transformer.layers.39.mlp.fc1
+model.vision.transformer.layers.39.mlp.fc2
+model.vision.transformer.layers.39.post_attention_layernorm
+model.vision.transformer.layers.40
+model.vision.transformer.layers.40.input_layernorm
+model.vision.transformer.layers.40.attention
+model.vision.transformer.layers.40.attention.query_key_value
+model.vision.transformer.layers.40.attention.dense
+model.vision.transformer.layers.40.attention.output_dropout
+model.vision.transformer.layers.40.mlp
+model.vision.transformer.layers.40.mlp.activation_fn
+model.vision.transformer.layers.40.mlp.fc1
+model.vision.transformer.layers.40.mlp.fc2
+model.vision.transformer.layers.40.post_attention_layernorm
+model.vision.transformer.layers.41
+model.vision.transformer.layers.41.input_layernorm
+model.vision.transformer.layers.41.attention
+model.vision.transformer.layers.41.attention.query_key_value
+model.vision.transformer.layers.41.attention.dense
+model.vision.transformer.layers.41.attention.output_dropout
+model.vision.transformer.layers.41.mlp
+model.vision.transformer.layers.41.mlp.activation_fn
+model.vision.transformer.layers.41.mlp.fc1
+model.vision.transformer.layers.41.mlp.fc2
+model.vision.transformer.layers.41.post_attention_layernorm
+model.vision.transformer.layers.42
+model.vision.transformer.layers.42.input_layernorm
+model.vision.transformer.layers.42.attention
+model.vision.transformer.layers.42.attention.query_key_value
+model.vision.transformer.layers.42.attention.dense
+model.vision.transformer.layers.42.attention.output_dropout
+model.vision.transformer.layers.42.mlp
+model.vision.transformer.layers.42.mlp.activation_fn
+model.vision.transformer.layers.42.mlp.fc1
+model.vision.transformer.layers.42.mlp.fc2
+model.vision.transformer.layers.42.post_attention_layernorm
+model.vision.transformer.layers.43
+model.vision.transformer.layers.43.input_layernorm
+model.vision.transformer.layers.43.attention
+model.vision.transformer.layers.43.attention.query_key_value
+model.vision.transformer.layers.43.attention.dense
+model.vision.transformer.layers.43.attention.output_dropout
+model.vision.transformer.layers.43.mlp
+model.vision.transformer.layers.43.mlp.activation_fn
+model.vision.transformer.layers.43.mlp.fc1
+model.vision.transformer.layers.43.mlp.fc2
+model.vision.transformer.layers.43.post_attention_layernorm
+model.vision.transformer.layers.44
+model.vision.transformer.layers.44.input_layernorm
+model.vision.transformer.layers.44.attention
+model.vision.transformer.layers.44.attention.query_key_value
+model.vision.transformer.layers.44.attention.dense
+model.vision.transformer.layers.44.attention.output_dropout
+model.vision.transformer.layers.44.mlp
+model.vision.transformer.layers.44.mlp.activation_fn
+model.vision.transformer.layers.44.mlp.fc1
+model.vision.transformer.layers.44.mlp.fc2
+model.vision.transformer.layers.44.post_attention_layernorm
+model.vision.transformer.layers.45
+model.vision.transformer.layers.45.input_layernorm
+model.vision.transformer.layers.45.attention
+model.vision.transformer.layers.45.attention.query_key_value
+model.vision.transformer.layers.45.attention.dense
+model.vision.transformer.layers.45.attention.output_dropout
+model.vision.transformer.layers.45.mlp
+model.vision.transformer.layers.45.mlp.activation_fn
+model.vision.transformer.layers.45.mlp.fc1
+model.vision.transformer.layers.45.mlp.fc2
+model.vision.transformer.layers.45.post_attention_layernorm
+model.vision.transformer.layers.46
+model.vision.transformer.layers.46.input_layernorm
+model.vision.transformer.layers.46.attention
+model.vision.transformer.layers.46.attention.query_key_value
+model.vision.transformer.layers.46.attention.dense
+model.vision.transformer.layers.46.attention.output_dropout
+model.vision.transformer.layers.46.mlp
+model.vision.transformer.layers.46.mlp.activation_fn
+model.vision.transformer.layers.46.mlp.fc1
+model.vision.transformer.layers.46.mlp.fc2
+model.vision.transformer.layers.46.post_attention_layernorm
+model.vision.transformer.layers.47
+model.vision.transformer.layers.47.input_layernorm
+model.vision.transformer.layers.47.attention
+model.vision.transformer.layers.47.attention.query_key_value
+model.vision.transformer.layers.47.attention.dense
+model.vision.transformer.layers.47.attention.output_dropout
+model.vision.transformer.layers.47.mlp
+model.vision.transformer.layers.47.mlp.activation_fn
+model.vision.transformer.layers.47.mlp.fc1
+model.vision.transformer.layers.47.mlp.fc2
+model.vision.transformer.layers.47.post_attention_layernorm
+model.vision.transformer.layers.48
+model.vision.transformer.layers.48.input_layernorm
+model.vision.transformer.layers.48.attention
+model.vision.transformer.layers.48.attention.query_key_value
+model.vision.transformer.layers.48.attention.dense
+model.vision.transformer.layers.48.attention.output_dropout
+model.vision.transformer.layers.48.mlp
+model.vision.transformer.layers.48.mlp.activation_fn
+model.vision.transformer.layers.48.mlp.fc1
+model.vision.transformer.layers.48.mlp.fc2
+model.vision.transformer.layers.48.post_attention_layernorm
+model.vision.transformer.layers.49
+model.vision.transformer.layers.49.input_layernorm
+model.vision.transformer.layers.49.attention
+model.vision.transformer.layers.49.attention.query_key_value
+model.vision.transformer.layers.49.attention.dense
+model.vision.transformer.layers.49.attention.output_dropout
+model.vision.transformer.layers.49.mlp
+model.vision.transformer.layers.49.mlp.activation_fn
+model.vision.transformer.layers.49.mlp.fc1
+model.vision.transformer.layers.49.mlp.fc2
+model.vision.transformer.layers.49.post_attention_layernorm
+model.vision.transformer.layers.50
+model.vision.transformer.layers.50.input_layernorm
+model.vision.transformer.layers.50.attention
+model.vision.transformer.layers.50.attention.query_key_value
+model.vision.transformer.layers.50.attention.dense
+model.vision.transformer.layers.50.attention.output_dropout
+model.vision.transformer.layers.50.mlp
+model.vision.transformer.layers.50.mlp.activation_fn
+model.vision.transformer.layers.50.mlp.fc1
+model.vision.transformer.layers.50.mlp.fc2
+model.vision.transformer.layers.50.post_attention_layernorm
+model.vision.transformer.layers.51
+model.vision.transformer.layers.51.input_layernorm
+model.vision.transformer.layers.51.attention
+model.vision.transformer.layers.51.attention.query_key_value
+model.vision.transformer.layers.51.attention.dense
+model.vision.transformer.layers.51.attention.output_dropout
+model.vision.transformer.layers.51.mlp
+model.vision.transformer.layers.51.mlp.activation_fn
+model.vision.transformer.layers.51.mlp.fc1
+model.vision.transformer.layers.51.mlp.fc2
+model.vision.transformer.layers.51.post_attention_layernorm
+model.vision.transformer.layers.52
+model.vision.transformer.layers.52.input_layernorm
+model.vision.transformer.layers.52.attention
+model.vision.transformer.layers.52.attention.query_key_value
+model.vision.transformer.layers.52.attention.dense
+model.vision.transformer.layers.52.attention.output_dropout
+model.vision.transformer.layers.52.mlp
+model.vision.transformer.layers.52.mlp.activation_fn
+model.vision.transformer.layers.52.mlp.fc1
+model.vision.transformer.layers.52.mlp.fc2
+model.vision.transformer.layers.52.post_attention_layernorm
+model.vision.transformer.layers.53
+model.vision.transformer.layers.53.input_layernorm
+model.vision.transformer.layers.53.attention
+model.vision.transformer.layers.53.attention.query_key_value
+model.vision.transformer.layers.53.attention.dense
+model.vision.transformer.layers.53.attention.output_dropout
+model.vision.transformer.layers.53.mlp
+model.vision.transformer.layers.53.mlp.activation_fn
+model.vision.transformer.layers.53.mlp.fc1
+model.vision.transformer.layers.53.mlp.fc2
+model.vision.transformer.layers.53.post_attention_layernorm
+model.vision.transformer.layers.54
+model.vision.transformer.layers.54.input_layernorm
+model.vision.transformer.layers.54.attention
+model.vision.transformer.layers.54.attention.query_key_value
+model.vision.transformer.layers.54.attention.dense
+model.vision.transformer.layers.54.attention.output_dropout
+model.vision.transformer.layers.54.mlp
+model.vision.transformer.layers.54.mlp.activation_fn
+model.vision.transformer.layers.54.mlp.fc1
+model.vision.transformer.layers.54.mlp.fc2
+model.vision.transformer.layers.54.post_attention_layernorm
+model.vision.transformer.layers.55
+model.vision.transformer.layers.55.input_layernorm
+model.vision.transformer.layers.55.attention
+model.vision.transformer.layers.55.attention.query_key_value
+model.vision.transformer.layers.55.attention.dense
+model.vision.transformer.layers.55.attention.output_dropout
+model.vision.transformer.layers.55.mlp
+model.vision.transformer.layers.55.mlp.activation_fn
+model.vision.transformer.layers.55.mlp.fc1
+model.vision.transformer.layers.55.mlp.fc2
+model.vision.transformer.layers.55.post_attention_layernorm
+model.vision.transformer.layers.56
+model.vision.transformer.layers.56.input_layernorm
+model.vision.transformer.layers.56.attention
+model.vision.transformer.layers.56.attention.query_key_value
+model.vision.transformer.layers.56.attention.dense
+model.vision.transformer.layers.56.attention.output_dropout
+model.vision.transformer.layers.56.mlp
+model.vision.transformer.layers.56.mlp.activation_fn
+model.vision.transformer.layers.56.mlp.fc1
+model.vision.transformer.layers.56.mlp.fc2
+model.vision.transformer.layers.56.post_attention_layernorm
+model.vision.transformer.layers.57
+model.vision.transformer.layers.57.input_layernorm
+model.vision.transformer.layers.57.attention
+model.vision.transformer.layers.57.attention.query_key_value
+model.vision.transformer.layers.57.attention.dense
+model.vision.transformer.layers.57.attention.output_dropout
+model.vision.transformer.layers.57.mlp
+model.vision.transformer.layers.57.mlp.activation_fn
+model.vision.transformer.layers.57.mlp.fc1
+model.vision.transformer.layers.57.mlp.fc2
+model.vision.transformer.layers.57.post_attention_layernorm
+model.vision.transformer.layers.58
+model.vision.transformer.layers.58.input_layernorm
+model.vision.transformer.layers.58.attention
+model.vision.transformer.layers.58.attention.query_key_value
+model.vision.transformer.layers.58.attention.dense
+model.vision.transformer.layers.58.attention.output_dropout
+model.vision.transformer.layers.58.mlp
+model.vision.transformer.layers.58.mlp.activation_fn
+model.vision.transformer.layers.58.mlp.fc1
+model.vision.transformer.layers.58.mlp.fc2
+model.vision.transformer.layers.58.post_attention_layernorm
+model.vision.transformer.layers.59
+model.vision.transformer.layers.59.input_layernorm
+model.vision.transformer.layers.59.attention
+model.vision.transformer.layers.59.attention.query_key_value
+model.vision.transformer.layers.59.attention.dense
+model.vision.transformer.layers.59.attention.output_dropout
+model.vision.transformer.layers.59.mlp
+model.vision.transformer.layers.59.mlp.activation_fn
+model.vision.transformer.layers.59.mlp.fc1
+model.vision.transformer.layers.59.mlp.fc2
+model.vision.transformer.layers.59.post_attention_layernorm
+model.vision.transformer.layers.60
+model.vision.transformer.layers.60.input_layernorm
+model.vision.transformer.layers.60.attention
+model.vision.transformer.layers.60.attention.query_key_value
+model.vision.transformer.layers.60.attention.dense
+model.vision.transformer.layers.60.attention.output_dropout
+model.vision.transformer.layers.60.mlp
+model.vision.transformer.layers.60.mlp.activation_fn
+model.vision.transformer.layers.60.mlp.fc1
+model.vision.transformer.layers.60.mlp.fc2
+model.vision.transformer.layers.60.post_attention_layernorm
+model.vision.transformer.layers.61
+model.vision.transformer.layers.61.input_layernorm
+model.vision.transformer.layers.61.attention
+model.vision.transformer.layers.61.attention.query_key_value
+model.vision.transformer.layers.61.attention.dense
+model.vision.transformer.layers.61.attention.output_dropout
+model.vision.transformer.layers.61.mlp
+model.vision.transformer.layers.61.mlp.activation_fn
+model.vision.transformer.layers.61.mlp.fc1
+model.vision.transformer.layers.61.mlp.fc2
+model.vision.transformer.layers.61.post_attention_layernorm
+model.vision.transformer.layers.62
+model.vision.transformer.layers.62.input_layernorm
+model.vision.transformer.layers.62.attention
+model.vision.transformer.layers.62.attention.query_key_value
+model.vision.transformer.layers.62.attention.dense
+model.vision.transformer.layers.62.attention.output_dropout
+model.vision.transformer.layers.62.mlp
+model.vision.transformer.layers.62.mlp.activation_fn
+model.vision.transformer.layers.62.mlp.fc1
+model.vision.transformer.layers.62.mlp.fc2
+model.vision.transformer.layers.62.post_attention_layernorm
+model.vision.linear_proj
+model.vision.linear_proj.linear_proj
+model.vision.linear_proj.norm1
+model.vision.linear_proj.act1
+model.vision.linear_proj.dense_h_to_4h
+model.vision.linear_proj.gate_proj
+model.vision.linear_proj.dense_4h_to_h
+lm_head
diff --git a/logs/allenai/Molmo-7B-D-0924.txt b/logs/allenai/Molmo-7B-D-0924.txt
new file mode 100644
index 0000000000000000000000000000000000000000..07cc6b05109a766361f2582b4c4a01acf59ab32a
--- /dev/null
+++ b/logs/allenai/Molmo-7B-D-0924.txt
@@ -0,0 +1,606 @@
+
+model
+model.transformer
+model.transformer.wte
+model.transformer.emb_drop
+model.transformer.ln_f
+model.transformer.blocks
+model.transformer.blocks.0
+model.transformer.blocks.0.dropout
+model.transformer.blocks.0.act
+model.transformer.blocks.0.attn_out
+model.transformer.blocks.0.ff_out
+model.transformer.blocks.0.rotary_emb
+model.transformer.blocks.0.attn_norm
+model.transformer.blocks.0.ff_norm
+model.transformer.blocks.0.att_proj
+model.transformer.blocks.0.ff_proj
+model.transformer.blocks.1
+model.transformer.blocks.1.dropout
+model.transformer.blocks.1.act
+model.transformer.blocks.1.attn_out
+model.transformer.blocks.1.ff_out
+model.transformer.blocks.1.rotary_emb
+model.transformer.blocks.1.attn_norm
+model.transformer.blocks.1.ff_norm
+model.transformer.blocks.1.att_proj
+model.transformer.blocks.1.ff_proj
+model.transformer.blocks.2
+model.transformer.blocks.2.dropout
+model.transformer.blocks.2.act
+model.transformer.blocks.2.attn_out
+model.transformer.blocks.2.ff_out
+model.transformer.blocks.2.rotary_emb
+model.transformer.blocks.2.attn_norm
+model.transformer.blocks.2.ff_norm
+model.transformer.blocks.2.att_proj
+model.transformer.blocks.2.ff_proj
+model.transformer.blocks.3
+model.transformer.blocks.3.dropout
+model.transformer.blocks.3.act
+model.transformer.blocks.3.attn_out
+model.transformer.blocks.3.ff_out
+model.transformer.blocks.3.rotary_emb
+model.transformer.blocks.3.attn_norm
+model.transformer.blocks.3.ff_norm
+model.transformer.blocks.3.att_proj
+model.transformer.blocks.3.ff_proj
+model.transformer.blocks.4
+model.transformer.blocks.4.dropout
+model.transformer.blocks.4.act
+model.transformer.blocks.4.attn_out
+model.transformer.blocks.4.ff_out
+model.transformer.blocks.4.rotary_emb
+model.transformer.blocks.4.attn_norm
+model.transformer.blocks.4.ff_norm
+model.transformer.blocks.4.att_proj
+model.transformer.blocks.4.ff_proj
+model.transformer.blocks.5
+model.transformer.blocks.5.dropout
+model.transformer.blocks.5.act
+model.transformer.blocks.5.attn_out
+model.transformer.blocks.5.ff_out
+model.transformer.blocks.5.rotary_emb
+model.transformer.blocks.5.attn_norm
+model.transformer.blocks.5.ff_norm
+model.transformer.blocks.5.att_proj
+model.transformer.blocks.5.ff_proj
+model.transformer.blocks.6
+model.transformer.blocks.6.dropout
+model.transformer.blocks.6.act
+model.transformer.blocks.6.attn_out
+model.transformer.blocks.6.ff_out
+model.transformer.blocks.6.rotary_emb
+model.transformer.blocks.6.attn_norm
+model.transformer.blocks.6.ff_norm
+model.transformer.blocks.6.att_proj
+model.transformer.blocks.6.ff_proj
+model.transformer.blocks.7
+model.transformer.blocks.7.dropout
+model.transformer.blocks.7.act
+model.transformer.blocks.7.attn_out
+model.transformer.blocks.7.ff_out
+model.transformer.blocks.7.rotary_emb
+model.transformer.blocks.7.attn_norm
+model.transformer.blocks.7.ff_norm
+model.transformer.blocks.7.att_proj
+model.transformer.blocks.7.ff_proj
+model.transformer.blocks.8
+model.transformer.blocks.8.dropout
+model.transformer.blocks.8.act
+model.transformer.blocks.8.attn_out
+model.transformer.blocks.8.ff_out
+model.transformer.blocks.8.rotary_emb
+model.transformer.blocks.8.attn_norm
+model.transformer.blocks.8.ff_norm
+model.transformer.blocks.8.att_proj
+model.transformer.blocks.8.ff_proj
+model.transformer.blocks.9
+model.transformer.blocks.9.dropout
+model.transformer.blocks.9.act
+model.transformer.blocks.9.attn_out
+model.transformer.blocks.9.ff_out
+model.transformer.blocks.9.rotary_emb
+model.transformer.blocks.9.attn_norm
+model.transformer.blocks.9.ff_norm
+model.transformer.blocks.9.att_proj
+model.transformer.blocks.9.ff_proj
+model.transformer.blocks.10
+model.transformer.blocks.10.dropout
+model.transformer.blocks.10.act
+model.transformer.blocks.10.attn_out
+model.transformer.blocks.10.ff_out
+model.transformer.blocks.10.rotary_emb
+model.transformer.blocks.10.attn_norm
+model.transformer.blocks.10.ff_norm
+model.transformer.blocks.10.att_proj
+model.transformer.blocks.10.ff_proj
+model.transformer.blocks.11
+model.transformer.blocks.11.dropout
+model.transformer.blocks.11.act
+model.transformer.blocks.11.attn_out
+model.transformer.blocks.11.ff_out
+model.transformer.blocks.11.rotary_emb
+model.transformer.blocks.11.attn_norm
+model.transformer.blocks.11.ff_norm
+model.transformer.blocks.11.att_proj
+model.transformer.blocks.11.ff_proj
+model.transformer.blocks.12
+model.transformer.blocks.12.dropout
+model.transformer.blocks.12.act
+model.transformer.blocks.12.attn_out
+model.transformer.blocks.12.ff_out
+model.transformer.blocks.12.rotary_emb
+model.transformer.blocks.12.attn_norm
+model.transformer.blocks.12.ff_norm
+model.transformer.blocks.12.att_proj
+model.transformer.blocks.12.ff_proj
+model.transformer.blocks.13
+model.transformer.blocks.13.dropout
+model.transformer.blocks.13.act
+model.transformer.blocks.13.attn_out
+model.transformer.blocks.13.ff_out
+model.transformer.blocks.13.rotary_emb
+model.transformer.blocks.13.attn_norm
+model.transformer.blocks.13.ff_norm
+model.transformer.blocks.13.att_proj
+model.transformer.blocks.13.ff_proj
+model.transformer.blocks.14
+model.transformer.blocks.14.dropout
+model.transformer.blocks.14.act
+model.transformer.blocks.14.attn_out
+model.transformer.blocks.14.ff_out
+model.transformer.blocks.14.rotary_emb
+model.transformer.blocks.14.attn_norm
+model.transformer.blocks.14.ff_norm
+model.transformer.blocks.14.att_proj
+model.transformer.blocks.14.ff_proj
+model.transformer.blocks.15
+model.transformer.blocks.15.dropout
+model.transformer.blocks.15.act
+model.transformer.blocks.15.attn_out
+model.transformer.blocks.15.ff_out
+model.transformer.blocks.15.rotary_emb
+model.transformer.blocks.15.attn_norm
+model.transformer.blocks.15.ff_norm
+model.transformer.blocks.15.att_proj
+model.transformer.blocks.15.ff_proj
+model.transformer.blocks.16
+model.transformer.blocks.16.dropout
+model.transformer.blocks.16.act
+model.transformer.blocks.16.attn_out
+model.transformer.blocks.16.ff_out
+model.transformer.blocks.16.rotary_emb
+model.transformer.blocks.16.attn_norm
+model.transformer.blocks.16.ff_norm
+model.transformer.blocks.16.att_proj
+model.transformer.blocks.16.ff_proj
+model.transformer.blocks.17
+model.transformer.blocks.17.dropout
+model.transformer.blocks.17.act
+model.transformer.blocks.17.attn_out
+model.transformer.blocks.17.ff_out
+model.transformer.blocks.17.rotary_emb
+model.transformer.blocks.17.attn_norm
+model.transformer.blocks.17.ff_norm
+model.transformer.blocks.17.att_proj
+model.transformer.blocks.17.ff_proj
+model.transformer.blocks.18
+model.transformer.blocks.18.dropout
+model.transformer.blocks.18.act
+model.transformer.blocks.18.attn_out
+model.transformer.blocks.18.ff_out
+model.transformer.blocks.18.rotary_emb
+model.transformer.blocks.18.attn_norm
+model.transformer.blocks.18.ff_norm
+model.transformer.blocks.18.att_proj
+model.transformer.blocks.18.ff_proj
+model.transformer.blocks.19
+model.transformer.blocks.19.dropout
+model.transformer.blocks.19.act
+model.transformer.blocks.19.attn_out
+model.transformer.blocks.19.ff_out
+model.transformer.blocks.19.rotary_emb
+model.transformer.blocks.19.attn_norm
+model.transformer.blocks.19.ff_norm
+model.transformer.blocks.19.att_proj
+model.transformer.blocks.19.ff_proj
+model.transformer.blocks.20
+model.transformer.blocks.20.dropout
+model.transformer.blocks.20.act
+model.transformer.blocks.20.attn_out
+model.transformer.blocks.20.ff_out
+model.transformer.blocks.20.rotary_emb
+model.transformer.blocks.20.attn_norm
+model.transformer.blocks.20.ff_norm
+model.transformer.blocks.20.att_proj
+model.transformer.blocks.20.ff_proj
+model.transformer.blocks.21
+model.transformer.blocks.21.dropout
+model.transformer.blocks.21.act
+model.transformer.blocks.21.attn_out
+model.transformer.blocks.21.ff_out
+model.transformer.blocks.21.rotary_emb
+model.transformer.blocks.21.attn_norm
+model.transformer.blocks.21.ff_norm
+model.transformer.blocks.21.att_proj
+model.transformer.blocks.21.ff_proj
+model.transformer.blocks.22
+model.transformer.blocks.22.dropout
+model.transformer.blocks.22.act
+model.transformer.blocks.22.attn_out
+model.transformer.blocks.22.ff_out
+model.transformer.blocks.22.rotary_emb
+model.transformer.blocks.22.attn_norm
+model.transformer.blocks.22.ff_norm
+model.transformer.blocks.22.att_proj
+model.transformer.blocks.22.ff_proj
+model.transformer.blocks.23
+model.transformer.blocks.23.dropout
+model.transformer.blocks.23.act
+model.transformer.blocks.23.attn_out
+model.transformer.blocks.23.ff_out
+model.transformer.blocks.23.rotary_emb
+model.transformer.blocks.23.attn_norm
+model.transformer.blocks.23.ff_norm
+model.transformer.blocks.23.att_proj
+model.transformer.blocks.23.ff_proj
+model.transformer.blocks.24
+model.transformer.blocks.24.dropout
+model.transformer.blocks.24.act
+model.transformer.blocks.24.attn_out
+model.transformer.blocks.24.ff_out
+model.transformer.blocks.24.rotary_emb
+model.transformer.blocks.24.attn_norm
+model.transformer.blocks.24.ff_norm
+model.transformer.blocks.24.att_proj
+model.transformer.blocks.24.ff_proj
+model.transformer.blocks.25
+model.transformer.blocks.25.dropout
+model.transformer.blocks.25.act
+model.transformer.blocks.25.attn_out
+model.transformer.blocks.25.ff_out
+model.transformer.blocks.25.rotary_emb
+model.transformer.blocks.25.attn_norm
+model.transformer.blocks.25.ff_norm
+model.transformer.blocks.25.att_proj
+model.transformer.blocks.25.ff_proj
+model.transformer.blocks.26
+model.transformer.blocks.26.dropout
+model.transformer.blocks.26.act
+model.transformer.blocks.26.attn_out
+model.transformer.blocks.26.ff_out
+model.transformer.blocks.26.rotary_emb
+model.transformer.blocks.26.attn_norm
+model.transformer.blocks.26.ff_norm
+model.transformer.blocks.26.att_proj
+model.transformer.blocks.26.ff_proj
+model.transformer.blocks.27
+model.transformer.blocks.27.dropout
+model.transformer.blocks.27.act
+model.transformer.blocks.27.attn_out
+model.transformer.blocks.27.ff_out
+model.transformer.blocks.27.rotary_emb
+model.transformer.blocks.27.attn_norm
+model.transformer.blocks.27.ff_norm
+model.transformer.blocks.27.att_proj
+model.transformer.blocks.27.ff_proj
+model.transformer.ff_out
+model.vision_backbone
+model.vision_backbone.image_vit
+model.vision_backbone.image_vit.patch_embedding
+model.vision_backbone.image_vit.pre_ln
+model.vision_backbone.image_vit.transformer
+model.vision_backbone.image_vit.transformer.resblocks
+model.vision_backbone.image_vit.transformer.resblocks.0
+model.vision_backbone.image_vit.transformer.resblocks.0.attention
+model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.0.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.0.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.0.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.1
+model.vision_backbone.image_vit.transformer.resblocks.1.attention
+model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.1.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.1.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.1.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.2
+model.vision_backbone.image_vit.transformer.resblocks.2.attention
+model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.2.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.2.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.2.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.3
+model.vision_backbone.image_vit.transformer.resblocks.3.attention
+model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.3.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.3.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.3.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.4
+model.vision_backbone.image_vit.transformer.resblocks.4.attention
+model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.4.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.4.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.4.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.5
+model.vision_backbone.image_vit.transformer.resblocks.5.attention
+model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.5.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.5.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.5.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.6
+model.vision_backbone.image_vit.transformer.resblocks.6.attention
+model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.6.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.6.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.6.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.7
+model.vision_backbone.image_vit.transformer.resblocks.7.attention
+model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.7.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.7.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.7.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.8
+model.vision_backbone.image_vit.transformer.resblocks.8.attention
+model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.8.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.8.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.8.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.9
+model.vision_backbone.image_vit.transformer.resblocks.9.attention
+model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.9.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.9.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.9.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.10
+model.vision_backbone.image_vit.transformer.resblocks.10.attention
+model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.10.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.10.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.10.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.11
+model.vision_backbone.image_vit.transformer.resblocks.11.attention
+model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.11.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.11.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.11.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.12
+model.vision_backbone.image_vit.transformer.resblocks.12.attention
+model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.12.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.12.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.12.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.13
+model.vision_backbone.image_vit.transformer.resblocks.13.attention
+model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.13.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.13.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.13.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.14
+model.vision_backbone.image_vit.transformer.resblocks.14.attention
+model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.14.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.14.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.14.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.15
+model.vision_backbone.image_vit.transformer.resblocks.15.attention
+model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.15.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.15.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.15.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.16
+model.vision_backbone.image_vit.transformer.resblocks.16.attention
+model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.16.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.16.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.16.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.17
+model.vision_backbone.image_vit.transformer.resblocks.17.attention
+model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.17.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.17.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.17.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.18
+model.vision_backbone.image_vit.transformer.resblocks.18.attention
+model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.18.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.18.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.18.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.19
+model.vision_backbone.image_vit.transformer.resblocks.19.attention
+model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.19.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.19.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.19.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.20
+model.vision_backbone.image_vit.transformer.resblocks.20.attention
+model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.20.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.20.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.20.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.21
+model.vision_backbone.image_vit.transformer.resblocks.21.attention
+model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.21.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.21.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.21.ffn_norm
+model.vision_backbone.image_vit.transformer.resblocks.22
+model.vision_backbone.image_vit.transformer.resblocks.22.attention
+model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq
+model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk
+model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv
+model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo
+model.vision_backbone.image_vit.transformer.resblocks.22.attention.residual_dropout
+model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward
+model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1
+model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.act
+model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2
+model.vision_backbone.image_vit.transformer.resblocks.22.attention_norm
+model.vision_backbone.image_vit.transformer.resblocks.22.ffn_norm
+model.vision_backbone.image_pooling_2d
+model.vision_backbone.image_pooling_2d.wq
+model.vision_backbone.image_pooling_2d.wk
+model.vision_backbone.image_pooling_2d.wv
+model.vision_backbone.image_pooling_2d.wo
+model.vision_backbone.image_pooling_2d.residual_dropout
+model.vision_backbone.image_projector
+model.vision_backbone.image_projector.w1
+model.vision_backbone.image_projector.w2
+model.vision_backbone.image_projector.w3
+model.vision_backbone.image_projector.act
+model.vision_backbone.image_projector.dropout
+model.vision_backbone.image_feature_dropout
diff --git a/logs/deepseek-community/Janus-Pro-1B.txt b/logs/deepseek-community/Janus-Pro-1B.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9152a96de0bd68eead350c0175f50f121fe59c80
--- /dev/null
+++ b/logs/deepseek-community/Janus-Pro-1B.txt
@@ -0,0 +1,1033 @@
+
+model
+model.vision_model
+model.vision_model.embeddings
+model.vision_model.embeddings.patch_embedding
+model.vision_model.embeddings.position_embedding
+model.vision_model.encoder
+model.vision_model.encoder.layers
+model.vision_model.encoder.layers.0
+model.vision_model.encoder.layers.0.layer_norm1
+model.vision_model.encoder.layers.0.self_attn
+model.vision_model.encoder.layers.0.self_attn.q_proj
+model.vision_model.encoder.layers.0.self_attn.k_proj
+model.vision_model.encoder.layers.0.self_attn.v_proj
+model.vision_model.encoder.layers.0.self_attn.projection_layer
+model.vision_model.encoder.layers.0.self_attn.projection_dropout
+model.vision_model.encoder.layers.0.self_attn.q_norm
+model.vision_model.encoder.layers.0.self_attn.k_norm
+model.vision_model.encoder.layers.0.layer_norm2
+model.vision_model.encoder.layers.0.mlp
+model.vision_model.encoder.layers.0.mlp.activation_fn
+model.vision_model.encoder.layers.0.mlp.fc1
+model.vision_model.encoder.layers.0.mlp.fc2
+model.vision_model.encoder.layers.0.mlp.dropout1
+model.vision_model.encoder.layers.0.mlp.dropout2
+model.vision_model.encoder.layers.1
+model.vision_model.encoder.layers.1.layer_norm1
+model.vision_model.encoder.layers.1.self_attn
+model.vision_model.encoder.layers.1.self_attn.q_proj
+model.vision_model.encoder.layers.1.self_attn.k_proj
+model.vision_model.encoder.layers.1.self_attn.v_proj
+model.vision_model.encoder.layers.1.self_attn.projection_layer
+model.vision_model.encoder.layers.1.self_attn.projection_dropout
+model.vision_model.encoder.layers.1.self_attn.q_norm
+model.vision_model.encoder.layers.1.self_attn.k_norm
+model.vision_model.encoder.layers.1.layer_norm2
+model.vision_model.encoder.layers.1.mlp
+model.vision_model.encoder.layers.1.mlp.activation_fn
+model.vision_model.encoder.layers.1.mlp.fc1
+model.vision_model.encoder.layers.1.mlp.fc2
+model.vision_model.encoder.layers.1.mlp.dropout1
+model.vision_model.encoder.layers.1.mlp.dropout2
+model.vision_model.encoder.layers.2
+model.vision_model.encoder.layers.2.layer_norm1
+model.vision_model.encoder.layers.2.self_attn
+model.vision_model.encoder.layers.2.self_attn.q_proj
+model.vision_model.encoder.layers.2.self_attn.k_proj
+model.vision_model.encoder.layers.2.self_attn.v_proj
+model.vision_model.encoder.layers.2.self_attn.projection_layer
+model.vision_model.encoder.layers.2.self_attn.projection_dropout
+model.vision_model.encoder.layers.2.self_attn.q_norm
+model.vision_model.encoder.layers.2.self_attn.k_norm
+model.vision_model.encoder.layers.2.layer_norm2
+model.vision_model.encoder.layers.2.mlp
+model.vision_model.encoder.layers.2.mlp.activation_fn
+model.vision_model.encoder.layers.2.mlp.fc1
+model.vision_model.encoder.layers.2.mlp.fc2
+model.vision_model.encoder.layers.2.mlp.dropout1
+model.vision_model.encoder.layers.2.mlp.dropout2
+model.vision_model.encoder.layers.3
+model.vision_model.encoder.layers.3.layer_norm1
+model.vision_model.encoder.layers.3.self_attn
+model.vision_model.encoder.layers.3.self_attn.q_proj
+model.vision_model.encoder.layers.3.self_attn.k_proj
+model.vision_model.encoder.layers.3.self_attn.v_proj
+model.vision_model.encoder.layers.3.self_attn.projection_layer
+model.vision_model.encoder.layers.3.self_attn.projection_dropout
+model.vision_model.encoder.layers.3.self_attn.q_norm
+model.vision_model.encoder.layers.3.self_attn.k_norm
+model.vision_model.encoder.layers.3.layer_norm2
+model.vision_model.encoder.layers.3.mlp
+model.vision_model.encoder.layers.3.mlp.activation_fn
+model.vision_model.encoder.layers.3.mlp.fc1
+model.vision_model.encoder.layers.3.mlp.fc2
+model.vision_model.encoder.layers.3.mlp.dropout1
+model.vision_model.encoder.layers.3.mlp.dropout2
+model.vision_model.encoder.layers.4
+model.vision_model.encoder.layers.4.layer_norm1
+model.vision_model.encoder.layers.4.self_attn
+model.vision_model.encoder.layers.4.self_attn.q_proj
+model.vision_model.encoder.layers.4.self_attn.k_proj
+model.vision_model.encoder.layers.4.self_attn.v_proj
+model.vision_model.encoder.layers.4.self_attn.projection_layer
+model.vision_model.encoder.layers.4.self_attn.projection_dropout
+model.vision_model.encoder.layers.4.self_attn.q_norm
+model.vision_model.encoder.layers.4.self_attn.k_norm
+model.vision_model.encoder.layers.4.layer_norm2
+model.vision_model.encoder.layers.4.mlp
+model.vision_model.encoder.layers.4.mlp.activation_fn
+model.vision_model.encoder.layers.4.mlp.fc1
+model.vision_model.encoder.layers.4.mlp.fc2
+model.vision_model.encoder.layers.4.mlp.dropout1
+model.vision_model.encoder.layers.4.mlp.dropout2
+model.vision_model.encoder.layers.5
+model.vision_model.encoder.layers.5.layer_norm1
+model.vision_model.encoder.layers.5.self_attn
+model.vision_model.encoder.layers.5.self_attn.q_proj
+model.vision_model.encoder.layers.5.self_attn.k_proj
+model.vision_model.encoder.layers.5.self_attn.v_proj
+model.vision_model.encoder.layers.5.self_attn.projection_layer
+model.vision_model.encoder.layers.5.self_attn.projection_dropout
+model.vision_model.encoder.layers.5.self_attn.q_norm
+model.vision_model.encoder.layers.5.self_attn.k_norm
+model.vision_model.encoder.layers.5.layer_norm2
+model.vision_model.encoder.layers.5.mlp
+model.vision_model.encoder.layers.5.mlp.activation_fn
+model.vision_model.encoder.layers.5.mlp.fc1
+model.vision_model.encoder.layers.5.mlp.fc2
+model.vision_model.encoder.layers.5.mlp.dropout1
+model.vision_model.encoder.layers.5.mlp.dropout2
+model.vision_model.encoder.layers.6
+model.vision_model.encoder.layers.6.layer_norm1
+model.vision_model.encoder.layers.6.self_attn
+model.vision_model.encoder.layers.6.self_attn.q_proj
+model.vision_model.encoder.layers.6.self_attn.k_proj
+model.vision_model.encoder.layers.6.self_attn.v_proj
+model.vision_model.encoder.layers.6.self_attn.projection_layer
+model.vision_model.encoder.layers.6.self_attn.projection_dropout
+model.vision_model.encoder.layers.6.self_attn.q_norm
+model.vision_model.encoder.layers.6.self_attn.k_norm
+model.vision_model.encoder.layers.6.layer_norm2
+model.vision_model.encoder.layers.6.mlp
+model.vision_model.encoder.layers.6.mlp.activation_fn
+model.vision_model.encoder.layers.6.mlp.fc1
+model.vision_model.encoder.layers.6.mlp.fc2
+model.vision_model.encoder.layers.6.mlp.dropout1
+model.vision_model.encoder.layers.6.mlp.dropout2
+model.vision_model.encoder.layers.7
+model.vision_model.encoder.layers.7.layer_norm1
+model.vision_model.encoder.layers.7.self_attn
+model.vision_model.encoder.layers.7.self_attn.q_proj
+model.vision_model.encoder.layers.7.self_attn.k_proj
+model.vision_model.encoder.layers.7.self_attn.v_proj
+model.vision_model.encoder.layers.7.self_attn.projection_layer
+model.vision_model.encoder.layers.7.self_attn.projection_dropout
+model.vision_model.encoder.layers.7.self_attn.q_norm
+model.vision_model.encoder.layers.7.self_attn.k_norm
+model.vision_model.encoder.layers.7.layer_norm2
+model.vision_model.encoder.layers.7.mlp
+model.vision_model.encoder.layers.7.mlp.activation_fn
+model.vision_model.encoder.layers.7.mlp.fc1
+model.vision_model.encoder.layers.7.mlp.fc2
+model.vision_model.encoder.layers.7.mlp.dropout1
+model.vision_model.encoder.layers.7.mlp.dropout2
+model.vision_model.encoder.layers.8
+model.vision_model.encoder.layers.8.layer_norm1
+model.vision_model.encoder.layers.8.self_attn
+model.vision_model.encoder.layers.8.self_attn.q_proj
+model.vision_model.encoder.layers.8.self_attn.k_proj
+model.vision_model.encoder.layers.8.self_attn.v_proj
+model.vision_model.encoder.layers.8.self_attn.projection_layer
+model.vision_model.encoder.layers.8.self_attn.projection_dropout
+model.vision_model.encoder.layers.8.self_attn.q_norm
+model.vision_model.encoder.layers.8.self_attn.k_norm
+model.vision_model.encoder.layers.8.layer_norm2
+model.vision_model.encoder.layers.8.mlp
+model.vision_model.encoder.layers.8.mlp.activation_fn
+model.vision_model.encoder.layers.8.mlp.fc1
+model.vision_model.encoder.layers.8.mlp.fc2
+model.vision_model.encoder.layers.8.mlp.dropout1
+model.vision_model.encoder.layers.8.mlp.dropout2
+model.vision_model.encoder.layers.9
+model.vision_model.encoder.layers.9.layer_norm1
+model.vision_model.encoder.layers.9.self_attn
+model.vision_model.encoder.layers.9.self_attn.q_proj
+model.vision_model.encoder.layers.9.self_attn.k_proj
+model.vision_model.encoder.layers.9.self_attn.v_proj
+model.vision_model.encoder.layers.9.self_attn.projection_layer
+model.vision_model.encoder.layers.9.self_attn.projection_dropout
+model.vision_model.encoder.layers.9.self_attn.q_norm
+model.vision_model.encoder.layers.9.self_attn.k_norm
+model.vision_model.encoder.layers.9.layer_norm2
+model.vision_model.encoder.layers.9.mlp
+model.vision_model.encoder.layers.9.mlp.activation_fn
+model.vision_model.encoder.layers.9.mlp.fc1
+model.vision_model.encoder.layers.9.mlp.fc2
+model.vision_model.encoder.layers.9.mlp.dropout1
+model.vision_model.encoder.layers.9.mlp.dropout2
+model.vision_model.encoder.layers.10
+model.vision_model.encoder.layers.10.layer_norm1
+model.vision_model.encoder.layers.10.self_attn
+model.vision_model.encoder.layers.10.self_attn.q_proj
+model.vision_model.encoder.layers.10.self_attn.k_proj
+model.vision_model.encoder.layers.10.self_attn.v_proj
+model.vision_model.encoder.layers.10.self_attn.projection_layer
+model.vision_model.encoder.layers.10.self_attn.projection_dropout
+model.vision_model.encoder.layers.10.self_attn.q_norm
+model.vision_model.encoder.layers.10.self_attn.k_norm
+model.vision_model.encoder.layers.10.layer_norm2
+model.vision_model.encoder.layers.10.mlp
+model.vision_model.encoder.layers.10.mlp.activation_fn
+model.vision_model.encoder.layers.10.mlp.fc1
+model.vision_model.encoder.layers.10.mlp.fc2
+model.vision_model.encoder.layers.10.mlp.dropout1
+model.vision_model.encoder.layers.10.mlp.dropout2
+model.vision_model.encoder.layers.11
+model.vision_model.encoder.layers.11.layer_norm1
+model.vision_model.encoder.layers.11.self_attn
+model.vision_model.encoder.layers.11.self_attn.q_proj
+model.vision_model.encoder.layers.11.self_attn.k_proj
+model.vision_model.encoder.layers.11.self_attn.v_proj
+model.vision_model.encoder.layers.11.self_attn.projection_layer
+model.vision_model.encoder.layers.11.self_attn.projection_dropout
+model.vision_model.encoder.layers.11.self_attn.q_norm
+model.vision_model.encoder.layers.11.self_attn.k_norm
+model.vision_model.encoder.layers.11.layer_norm2
+model.vision_model.encoder.layers.11.mlp
+model.vision_model.encoder.layers.11.mlp.activation_fn
+model.vision_model.encoder.layers.11.mlp.fc1
+model.vision_model.encoder.layers.11.mlp.fc2
+model.vision_model.encoder.layers.11.mlp.dropout1
+model.vision_model.encoder.layers.11.mlp.dropout2
+model.vision_model.encoder.layers.12
+model.vision_model.encoder.layers.12.layer_norm1
+model.vision_model.encoder.layers.12.self_attn
+model.vision_model.encoder.layers.12.self_attn.q_proj
+model.vision_model.encoder.layers.12.self_attn.k_proj
+model.vision_model.encoder.layers.12.self_attn.v_proj
+model.vision_model.encoder.layers.12.self_attn.projection_layer
+model.vision_model.encoder.layers.12.self_attn.projection_dropout
+model.vision_model.encoder.layers.12.self_attn.q_norm
+model.vision_model.encoder.layers.12.self_attn.k_norm
+model.vision_model.encoder.layers.12.layer_norm2
+model.vision_model.encoder.layers.12.mlp
+model.vision_model.encoder.layers.12.mlp.activation_fn
+model.vision_model.encoder.layers.12.mlp.fc1
+model.vision_model.encoder.layers.12.mlp.fc2
+model.vision_model.encoder.layers.12.mlp.dropout1
+model.vision_model.encoder.layers.12.mlp.dropout2
+model.vision_model.encoder.layers.13
+model.vision_model.encoder.layers.13.layer_norm1
+model.vision_model.encoder.layers.13.self_attn
+model.vision_model.encoder.layers.13.self_attn.q_proj
+model.vision_model.encoder.layers.13.self_attn.k_proj
+model.vision_model.encoder.layers.13.self_attn.v_proj
+model.vision_model.encoder.layers.13.self_attn.projection_layer
+model.vision_model.encoder.layers.13.self_attn.projection_dropout
+model.vision_model.encoder.layers.13.self_attn.q_norm
+model.vision_model.encoder.layers.13.self_attn.k_norm
+model.vision_model.encoder.layers.13.layer_norm2
+model.vision_model.encoder.layers.13.mlp
+model.vision_model.encoder.layers.13.mlp.activation_fn
+model.vision_model.encoder.layers.13.mlp.fc1
+model.vision_model.encoder.layers.13.mlp.fc2
+model.vision_model.encoder.layers.13.mlp.dropout1
+model.vision_model.encoder.layers.13.mlp.dropout2
+model.vision_model.encoder.layers.14
+model.vision_model.encoder.layers.14.layer_norm1
+model.vision_model.encoder.layers.14.self_attn
+model.vision_model.encoder.layers.14.self_attn.q_proj
+model.vision_model.encoder.layers.14.self_attn.k_proj
+model.vision_model.encoder.layers.14.self_attn.v_proj
+model.vision_model.encoder.layers.14.self_attn.projection_layer
+model.vision_model.encoder.layers.14.self_attn.projection_dropout
+model.vision_model.encoder.layers.14.self_attn.q_norm
+model.vision_model.encoder.layers.14.self_attn.k_norm
+model.vision_model.encoder.layers.14.layer_norm2
+model.vision_model.encoder.layers.14.mlp
+model.vision_model.encoder.layers.14.mlp.activation_fn
+model.vision_model.encoder.layers.14.mlp.fc1
+model.vision_model.encoder.layers.14.mlp.fc2
+model.vision_model.encoder.layers.14.mlp.dropout1
+model.vision_model.encoder.layers.14.mlp.dropout2
+model.vision_model.encoder.layers.15
+model.vision_model.encoder.layers.15.layer_norm1
+model.vision_model.encoder.layers.15.self_attn
+model.vision_model.encoder.layers.15.self_attn.q_proj
+model.vision_model.encoder.layers.15.self_attn.k_proj
+model.vision_model.encoder.layers.15.self_attn.v_proj
+model.vision_model.encoder.layers.15.self_attn.projection_layer
+model.vision_model.encoder.layers.15.self_attn.projection_dropout
+model.vision_model.encoder.layers.15.self_attn.q_norm
+model.vision_model.encoder.layers.15.self_attn.k_norm
+model.vision_model.encoder.layers.15.layer_norm2
+model.vision_model.encoder.layers.15.mlp
+model.vision_model.encoder.layers.15.mlp.activation_fn
+model.vision_model.encoder.layers.15.mlp.fc1
+model.vision_model.encoder.layers.15.mlp.fc2
+model.vision_model.encoder.layers.15.mlp.dropout1
+model.vision_model.encoder.layers.15.mlp.dropout2
+model.vision_model.encoder.layers.16
+model.vision_model.encoder.layers.16.layer_norm1
+model.vision_model.encoder.layers.16.self_attn
+model.vision_model.encoder.layers.16.self_attn.q_proj
+model.vision_model.encoder.layers.16.self_attn.k_proj
+model.vision_model.encoder.layers.16.self_attn.v_proj
+model.vision_model.encoder.layers.16.self_attn.projection_layer
+model.vision_model.encoder.layers.16.self_attn.projection_dropout
+model.vision_model.encoder.layers.16.self_attn.q_norm
+model.vision_model.encoder.layers.16.self_attn.k_norm
+model.vision_model.encoder.layers.16.layer_norm2
+model.vision_model.encoder.layers.16.mlp
+model.vision_model.encoder.layers.16.mlp.activation_fn
+model.vision_model.encoder.layers.16.mlp.fc1
+model.vision_model.encoder.layers.16.mlp.fc2
+model.vision_model.encoder.layers.16.mlp.dropout1
+model.vision_model.encoder.layers.16.mlp.dropout2
+model.vision_model.encoder.layers.17
+model.vision_model.encoder.layers.17.layer_norm1
+model.vision_model.encoder.layers.17.self_attn
+model.vision_model.encoder.layers.17.self_attn.q_proj
+model.vision_model.encoder.layers.17.self_attn.k_proj
+model.vision_model.encoder.layers.17.self_attn.v_proj
+model.vision_model.encoder.layers.17.self_attn.projection_layer
+model.vision_model.encoder.layers.17.self_attn.projection_dropout
+model.vision_model.encoder.layers.17.self_attn.q_norm
+model.vision_model.encoder.layers.17.self_attn.k_norm
+model.vision_model.encoder.layers.17.layer_norm2
+model.vision_model.encoder.layers.17.mlp
+model.vision_model.encoder.layers.17.mlp.activation_fn
+model.vision_model.encoder.layers.17.mlp.fc1
+model.vision_model.encoder.layers.17.mlp.fc2
+model.vision_model.encoder.layers.17.mlp.dropout1
+model.vision_model.encoder.layers.17.mlp.dropout2
+model.vision_model.encoder.layers.18
+model.vision_model.encoder.layers.18.layer_norm1
+model.vision_model.encoder.layers.18.self_attn
+model.vision_model.encoder.layers.18.self_attn.q_proj
+model.vision_model.encoder.layers.18.self_attn.k_proj
+model.vision_model.encoder.layers.18.self_attn.v_proj
+model.vision_model.encoder.layers.18.self_attn.projection_layer
+model.vision_model.encoder.layers.18.self_attn.projection_dropout
+model.vision_model.encoder.layers.18.self_attn.q_norm
+model.vision_model.encoder.layers.18.self_attn.k_norm
+model.vision_model.encoder.layers.18.layer_norm2
+model.vision_model.encoder.layers.18.mlp
+model.vision_model.encoder.layers.18.mlp.activation_fn
+model.vision_model.encoder.layers.18.mlp.fc1
+model.vision_model.encoder.layers.18.mlp.fc2
+model.vision_model.encoder.layers.18.mlp.dropout1
+model.vision_model.encoder.layers.18.mlp.dropout2
+model.vision_model.encoder.layers.19
+model.vision_model.encoder.layers.19.layer_norm1
+model.vision_model.encoder.layers.19.self_attn
+model.vision_model.encoder.layers.19.self_attn.q_proj
+model.vision_model.encoder.layers.19.self_attn.k_proj
+model.vision_model.encoder.layers.19.self_attn.v_proj
+model.vision_model.encoder.layers.19.self_attn.projection_layer
+model.vision_model.encoder.layers.19.self_attn.projection_dropout
+model.vision_model.encoder.layers.19.self_attn.q_norm
+model.vision_model.encoder.layers.19.self_attn.k_norm
+model.vision_model.encoder.layers.19.layer_norm2
+model.vision_model.encoder.layers.19.mlp
+model.vision_model.encoder.layers.19.mlp.activation_fn
+model.vision_model.encoder.layers.19.mlp.fc1
+model.vision_model.encoder.layers.19.mlp.fc2
+model.vision_model.encoder.layers.19.mlp.dropout1
+model.vision_model.encoder.layers.19.mlp.dropout2
+model.vision_model.encoder.layers.20
+model.vision_model.encoder.layers.20.layer_norm1
+model.vision_model.encoder.layers.20.self_attn
+model.vision_model.encoder.layers.20.self_attn.q_proj
+model.vision_model.encoder.layers.20.self_attn.k_proj
+model.vision_model.encoder.layers.20.self_attn.v_proj
+model.vision_model.encoder.layers.20.self_attn.projection_layer
+model.vision_model.encoder.layers.20.self_attn.projection_dropout
+model.vision_model.encoder.layers.20.self_attn.q_norm
+model.vision_model.encoder.layers.20.self_attn.k_norm
+model.vision_model.encoder.layers.20.layer_norm2
+model.vision_model.encoder.layers.20.mlp
+model.vision_model.encoder.layers.20.mlp.activation_fn
+model.vision_model.encoder.layers.20.mlp.fc1
+model.vision_model.encoder.layers.20.mlp.fc2
+model.vision_model.encoder.layers.20.mlp.dropout1
+model.vision_model.encoder.layers.20.mlp.dropout2
+model.vision_model.encoder.layers.21
+model.vision_model.encoder.layers.21.layer_norm1
+model.vision_model.encoder.layers.21.self_attn
+model.vision_model.encoder.layers.21.self_attn.q_proj
+model.vision_model.encoder.layers.21.self_attn.k_proj
+model.vision_model.encoder.layers.21.self_attn.v_proj
+model.vision_model.encoder.layers.21.self_attn.projection_layer
+model.vision_model.encoder.layers.21.self_attn.projection_dropout
+model.vision_model.encoder.layers.21.self_attn.q_norm
+model.vision_model.encoder.layers.21.self_attn.k_norm
+model.vision_model.encoder.layers.21.layer_norm2
+model.vision_model.encoder.layers.21.mlp
+model.vision_model.encoder.layers.21.mlp.activation_fn
+model.vision_model.encoder.layers.21.mlp.fc1
+model.vision_model.encoder.layers.21.mlp.fc2
+model.vision_model.encoder.layers.21.mlp.dropout1
+model.vision_model.encoder.layers.21.mlp.dropout2
+model.vision_model.encoder.layers.22
+model.vision_model.encoder.layers.22.layer_norm1
+model.vision_model.encoder.layers.22.self_attn
+model.vision_model.encoder.layers.22.self_attn.q_proj
+model.vision_model.encoder.layers.22.self_attn.k_proj
+model.vision_model.encoder.layers.22.self_attn.v_proj
+model.vision_model.encoder.layers.22.self_attn.projection_layer
+model.vision_model.encoder.layers.22.self_attn.projection_dropout
+model.vision_model.encoder.layers.22.self_attn.q_norm
+model.vision_model.encoder.layers.22.self_attn.k_norm
+model.vision_model.encoder.layers.22.layer_norm2
+model.vision_model.encoder.layers.22.mlp
+model.vision_model.encoder.layers.22.mlp.activation_fn
+model.vision_model.encoder.layers.22.mlp.fc1
+model.vision_model.encoder.layers.22.mlp.fc2
+model.vision_model.encoder.layers.22.mlp.dropout1
+model.vision_model.encoder.layers.22.mlp.dropout2
+model.vision_model.encoder.layers.23
+model.vision_model.encoder.layers.23.layer_norm1
+model.vision_model.encoder.layers.23.self_attn
+model.vision_model.encoder.layers.23.self_attn.q_proj
+model.vision_model.encoder.layers.23.self_attn.k_proj
+model.vision_model.encoder.layers.23.self_attn.v_proj
+model.vision_model.encoder.layers.23.self_attn.projection_layer
+model.vision_model.encoder.layers.23.self_attn.projection_dropout
+model.vision_model.encoder.layers.23.self_attn.q_norm
+model.vision_model.encoder.layers.23.self_attn.k_norm
+model.vision_model.encoder.layers.23.layer_norm2
+model.vision_model.encoder.layers.23.mlp
+model.vision_model.encoder.layers.23.mlp.activation_fn
+model.vision_model.encoder.layers.23.mlp.fc1
+model.vision_model.encoder.layers.23.mlp.fc2
+model.vision_model.encoder.layers.23.mlp.dropout1
+model.vision_model.encoder.layers.23.mlp.dropout2
+model.vision_model.post_layernorm
+model.aligner
+model.aligner.fc1
+model.aligner.hidden_layers
+model.aligner.hidden_layers.0
+model.aligner.activation_fn
+model.vqmodel
+model.vqmodel.encoder
+model.vqmodel.encoder.conv_in
+model.vqmodel.encoder.down
+model.vqmodel.encoder.down.0
+model.vqmodel.encoder.down.0.block
+model.vqmodel.encoder.down.0.block.0
+model.vqmodel.encoder.down.0.block.0.norm1
+model.vqmodel.encoder.down.0.block.0.conv1
+model.vqmodel.encoder.down.0.block.0.norm2
+model.vqmodel.encoder.down.0.block.0.dropout
+model.vqmodel.encoder.down.0.block.0.conv2
+model.vqmodel.encoder.down.0.block.1
+model.vqmodel.encoder.down.0.block.1.norm1
+model.vqmodel.encoder.down.0.block.1.conv1
+model.vqmodel.encoder.down.0.block.1.norm2
+model.vqmodel.encoder.down.0.block.1.dropout
+model.vqmodel.encoder.down.0.block.1.conv2
+model.vqmodel.encoder.down.0.attn
+model.vqmodel.encoder.down.0.downsample
+model.vqmodel.encoder.down.0.downsample.conv
+model.vqmodel.encoder.down.1
+model.vqmodel.encoder.down.1.block
+model.vqmodel.encoder.down.1.block.0
+model.vqmodel.encoder.down.1.block.0.norm1
+model.vqmodel.encoder.down.1.block.0.conv1
+model.vqmodel.encoder.down.1.block.0.norm2
+model.vqmodel.encoder.down.1.block.0.dropout
+model.vqmodel.encoder.down.1.block.0.conv2
+model.vqmodel.encoder.down.1.block.1
+model.vqmodel.encoder.down.1.block.1.norm1
+model.vqmodel.encoder.down.1.block.1.conv1
+model.vqmodel.encoder.down.1.block.1.norm2
+model.vqmodel.encoder.down.1.block.1.dropout
+model.vqmodel.encoder.down.1.block.1.conv2
+model.vqmodel.encoder.down.1.attn
+model.vqmodel.encoder.down.1.downsample
+model.vqmodel.encoder.down.1.downsample.conv
+model.vqmodel.encoder.down.2
+model.vqmodel.encoder.down.2.block
+model.vqmodel.encoder.down.2.block.0
+model.vqmodel.encoder.down.2.block.0.norm1
+model.vqmodel.encoder.down.2.block.0.conv1
+model.vqmodel.encoder.down.2.block.0.norm2
+model.vqmodel.encoder.down.2.block.0.dropout
+model.vqmodel.encoder.down.2.block.0.conv2
+model.vqmodel.encoder.down.2.block.0.nin_shortcut
+model.vqmodel.encoder.down.2.block.1
+model.vqmodel.encoder.down.2.block.1.norm1
+model.vqmodel.encoder.down.2.block.1.conv1
+model.vqmodel.encoder.down.2.block.1.norm2
+model.vqmodel.encoder.down.2.block.1.dropout
+model.vqmodel.encoder.down.2.block.1.conv2
+model.vqmodel.encoder.down.2.attn
+model.vqmodel.encoder.down.2.downsample
+model.vqmodel.encoder.down.2.downsample.conv
+model.vqmodel.encoder.down.3
+model.vqmodel.encoder.down.3.block
+model.vqmodel.encoder.down.3.block.0
+model.vqmodel.encoder.down.3.block.0.norm1
+model.vqmodel.encoder.down.3.block.0.conv1
+model.vqmodel.encoder.down.3.block.0.norm2
+model.vqmodel.encoder.down.3.block.0.dropout
+model.vqmodel.encoder.down.3.block.0.conv2
+model.vqmodel.encoder.down.3.block.1
+model.vqmodel.encoder.down.3.block.1.norm1
+model.vqmodel.encoder.down.3.block.1.conv1
+model.vqmodel.encoder.down.3.block.1.norm2
+model.vqmodel.encoder.down.3.block.1.dropout
+model.vqmodel.encoder.down.3.block.1.conv2
+model.vqmodel.encoder.down.3.attn
+model.vqmodel.encoder.down.3.downsample
+model.vqmodel.encoder.down.3.downsample.conv
+model.vqmodel.encoder.down.4
+model.vqmodel.encoder.down.4.block
+model.vqmodel.encoder.down.4.block.0
+model.vqmodel.encoder.down.4.block.0.norm1
+model.vqmodel.encoder.down.4.block.0.conv1
+model.vqmodel.encoder.down.4.block.0.norm2
+model.vqmodel.encoder.down.4.block.0.dropout
+model.vqmodel.encoder.down.4.block.0.conv2
+model.vqmodel.encoder.down.4.block.0.nin_shortcut
+model.vqmodel.encoder.down.4.block.1
+model.vqmodel.encoder.down.4.block.1.norm1
+model.vqmodel.encoder.down.4.block.1.conv1
+model.vqmodel.encoder.down.4.block.1.norm2
+model.vqmodel.encoder.down.4.block.1.dropout
+model.vqmodel.encoder.down.4.block.1.conv2
+model.vqmodel.encoder.down.4.attn
+model.vqmodel.encoder.down.4.attn.0
+model.vqmodel.encoder.down.4.attn.0.norm
+model.vqmodel.encoder.down.4.attn.0.q
+model.vqmodel.encoder.down.4.attn.0.k
+model.vqmodel.encoder.down.4.attn.0.v
+model.vqmodel.encoder.down.4.attn.0.proj_out
+model.vqmodel.encoder.down.4.attn.1
+model.vqmodel.encoder.down.4.attn.1.norm
+model.vqmodel.encoder.down.4.attn.1.q
+model.vqmodel.encoder.down.4.attn.1.k
+model.vqmodel.encoder.down.4.attn.1.v
+model.vqmodel.encoder.down.4.attn.1.proj_out
+model.vqmodel.encoder.mid
+model.vqmodel.encoder.mid.block_1
+model.vqmodel.encoder.mid.block_1.norm1
+model.vqmodel.encoder.mid.block_1.conv1
+model.vqmodel.encoder.mid.block_1.norm2
+model.vqmodel.encoder.mid.block_1.dropout
+model.vqmodel.encoder.mid.block_1.conv2
+model.vqmodel.encoder.mid.attn_1
+model.vqmodel.encoder.mid.attn_1.norm
+model.vqmodel.encoder.mid.attn_1.q
+model.vqmodel.encoder.mid.attn_1.k
+model.vqmodel.encoder.mid.attn_1.v
+model.vqmodel.encoder.mid.attn_1.proj_out
+model.vqmodel.encoder.mid.block_2
+model.vqmodel.encoder.mid.block_2.norm1
+model.vqmodel.encoder.mid.block_2.conv1
+model.vqmodel.encoder.mid.block_2.norm2
+model.vqmodel.encoder.mid.block_2.dropout
+model.vqmodel.encoder.mid.block_2.conv2
+model.vqmodel.encoder.norm_out
+model.vqmodel.encoder.conv_out
+model.vqmodel.quantize
+model.vqmodel.quantize.embedding
+model.vqmodel.quant_conv
+model.vqmodel.post_quant_conv
+model.vqmodel.decoder
+model.vqmodel.decoder.conv_in
+model.vqmodel.decoder.mid
+model.vqmodel.decoder.mid.block_1
+model.vqmodel.decoder.mid.block_1.norm1
+model.vqmodel.decoder.mid.block_1.conv1
+model.vqmodel.decoder.mid.block_1.norm2
+model.vqmodel.decoder.mid.block_1.dropout
+model.vqmodel.decoder.mid.block_1.conv2
+model.vqmodel.decoder.mid.attn_1
+model.vqmodel.decoder.mid.attn_1.norm
+model.vqmodel.decoder.mid.attn_1.q
+model.vqmodel.decoder.mid.attn_1.k
+model.vqmodel.decoder.mid.attn_1.v
+model.vqmodel.decoder.mid.attn_1.proj_out
+model.vqmodel.decoder.mid.block_2
+model.vqmodel.decoder.mid.block_2.norm1
+model.vqmodel.decoder.mid.block_2.conv1
+model.vqmodel.decoder.mid.block_2.norm2
+model.vqmodel.decoder.mid.block_2.dropout
+model.vqmodel.decoder.mid.block_2.conv2
+model.vqmodel.decoder.up
+model.vqmodel.decoder.up.0
+model.vqmodel.decoder.up.0.block
+model.vqmodel.decoder.up.0.block.0
+model.vqmodel.decoder.up.0.block.0.norm1
+model.vqmodel.decoder.up.0.block.0.conv1
+model.vqmodel.decoder.up.0.block.0.norm2
+model.vqmodel.decoder.up.0.block.0.dropout
+model.vqmodel.decoder.up.0.block.0.conv2
+model.vqmodel.decoder.up.0.block.1
+model.vqmodel.decoder.up.0.block.1.norm1
+model.vqmodel.decoder.up.0.block.1.conv1
+model.vqmodel.decoder.up.0.block.1.norm2
+model.vqmodel.decoder.up.0.block.1.dropout
+model.vqmodel.decoder.up.0.block.1.conv2
+model.vqmodel.decoder.up.0.block.2
+model.vqmodel.decoder.up.0.block.2.norm1
+model.vqmodel.decoder.up.0.block.2.conv1
+model.vqmodel.decoder.up.0.block.2.norm2
+model.vqmodel.decoder.up.0.block.2.dropout
+model.vqmodel.decoder.up.0.block.2.conv2
+model.vqmodel.decoder.up.0.attn
+model.vqmodel.decoder.up.0.attn.0
+model.vqmodel.decoder.up.0.attn.0.norm
+model.vqmodel.decoder.up.0.attn.0.q
+model.vqmodel.decoder.up.0.attn.0.k
+model.vqmodel.decoder.up.0.attn.0.v
+model.vqmodel.decoder.up.0.attn.0.proj_out
+model.vqmodel.decoder.up.0.attn.1
+model.vqmodel.decoder.up.0.attn.1.norm
+model.vqmodel.decoder.up.0.attn.1.q
+model.vqmodel.decoder.up.0.attn.1.k
+model.vqmodel.decoder.up.0.attn.1.v
+model.vqmodel.decoder.up.0.attn.1.proj_out
+model.vqmodel.decoder.up.0.attn.2
+model.vqmodel.decoder.up.0.attn.2.norm
+model.vqmodel.decoder.up.0.attn.2.q
+model.vqmodel.decoder.up.0.attn.2.k
+model.vqmodel.decoder.up.0.attn.2.v
+model.vqmodel.decoder.up.0.attn.2.proj_out
+model.vqmodel.decoder.up.0.upsample
+model.vqmodel.decoder.up.0.upsample.conv
+model.vqmodel.decoder.up.1
+model.vqmodel.decoder.up.1.block
+model.vqmodel.decoder.up.1.block.0
+model.vqmodel.decoder.up.1.block.0.norm1
+model.vqmodel.decoder.up.1.block.0.conv1
+model.vqmodel.decoder.up.1.block.0.norm2
+model.vqmodel.decoder.up.1.block.0.dropout
+model.vqmodel.decoder.up.1.block.0.conv2
+model.vqmodel.decoder.up.1.block.0.nin_shortcut
+model.vqmodel.decoder.up.1.block.1
+model.vqmodel.decoder.up.1.block.1.norm1
+model.vqmodel.decoder.up.1.block.1.conv1
+model.vqmodel.decoder.up.1.block.1.norm2
+model.vqmodel.decoder.up.1.block.1.dropout
+model.vqmodel.decoder.up.1.block.1.conv2
+model.vqmodel.decoder.up.1.block.2
+model.vqmodel.decoder.up.1.block.2.norm1
+model.vqmodel.decoder.up.1.block.2.conv1
+model.vqmodel.decoder.up.1.block.2.norm2
+model.vqmodel.decoder.up.1.block.2.dropout
+model.vqmodel.decoder.up.1.block.2.conv2
+model.vqmodel.decoder.up.1.attn
+model.vqmodel.decoder.up.1.upsample
+model.vqmodel.decoder.up.1.upsample.conv
+model.vqmodel.decoder.up.2
+model.vqmodel.decoder.up.2.block
+model.vqmodel.decoder.up.2.block.0
+model.vqmodel.decoder.up.2.block.0.norm1
+model.vqmodel.decoder.up.2.block.0.conv1
+model.vqmodel.decoder.up.2.block.0.norm2
+model.vqmodel.decoder.up.2.block.0.dropout
+model.vqmodel.decoder.up.2.block.0.conv2
+model.vqmodel.decoder.up.2.block.1
+model.vqmodel.decoder.up.2.block.1.norm1
+model.vqmodel.decoder.up.2.block.1.conv1
+model.vqmodel.decoder.up.2.block.1.norm2
+model.vqmodel.decoder.up.2.block.1.dropout
+model.vqmodel.decoder.up.2.block.1.conv2
+model.vqmodel.decoder.up.2.block.2
+model.vqmodel.decoder.up.2.block.2.norm1
+model.vqmodel.decoder.up.2.block.2.conv1
+model.vqmodel.decoder.up.2.block.2.norm2
+model.vqmodel.decoder.up.2.block.2.dropout
+model.vqmodel.decoder.up.2.block.2.conv2
+model.vqmodel.decoder.up.2.attn
+model.vqmodel.decoder.up.2.upsample
+model.vqmodel.decoder.up.2.upsample.conv
+model.vqmodel.decoder.up.3
+model.vqmodel.decoder.up.3.block
+model.vqmodel.decoder.up.3.block.0
+model.vqmodel.decoder.up.3.block.0.norm1
+model.vqmodel.decoder.up.3.block.0.conv1
+model.vqmodel.decoder.up.3.block.0.norm2
+model.vqmodel.decoder.up.3.block.0.dropout
+model.vqmodel.decoder.up.3.block.0.conv2
+model.vqmodel.decoder.up.3.block.0.nin_shortcut
+model.vqmodel.decoder.up.3.block.1
+model.vqmodel.decoder.up.3.block.1.norm1
+model.vqmodel.decoder.up.3.block.1.conv1
+model.vqmodel.decoder.up.3.block.1.norm2
+model.vqmodel.decoder.up.3.block.1.dropout
+model.vqmodel.decoder.up.3.block.1.conv2
+model.vqmodel.decoder.up.3.block.2
+model.vqmodel.decoder.up.3.block.2.norm1
+model.vqmodel.decoder.up.3.block.2.conv1
+model.vqmodel.decoder.up.3.block.2.norm2
+model.vqmodel.decoder.up.3.block.2.dropout
+model.vqmodel.decoder.up.3.block.2.conv2
+model.vqmodel.decoder.up.3.attn
+model.vqmodel.decoder.up.3.upsample
+model.vqmodel.decoder.up.3.upsample.conv
+model.vqmodel.decoder.up.4
+model.vqmodel.decoder.up.4.block
+model.vqmodel.decoder.up.4.block.0
+model.vqmodel.decoder.up.4.block.0.norm1
+model.vqmodel.decoder.up.4.block.0.conv1
+model.vqmodel.decoder.up.4.block.0.norm2
+model.vqmodel.decoder.up.4.block.0.dropout
+model.vqmodel.decoder.up.4.block.0.conv2
+model.vqmodel.decoder.up.4.block.1
+model.vqmodel.decoder.up.4.block.1.norm1
+model.vqmodel.decoder.up.4.block.1.conv1
+model.vqmodel.decoder.up.4.block.1.norm2
+model.vqmodel.decoder.up.4.block.1.dropout
+model.vqmodel.decoder.up.4.block.1.conv2
+model.vqmodel.decoder.up.4.block.2
+model.vqmodel.decoder.up.4.block.2.norm1
+model.vqmodel.decoder.up.4.block.2.conv1
+model.vqmodel.decoder.up.4.block.2.norm2
+model.vqmodel.decoder.up.4.block.2.dropout
+model.vqmodel.decoder.up.4.block.2.conv2
+model.vqmodel.decoder.up.4.attn
+model.vqmodel.decoder.norm_out
+model.vqmodel.decoder.conv_out
+model.generation_embeddings
+model.generation_aligner
+model.generation_aligner.fc1
+model.generation_aligner.hidden_layers
+model.generation_aligner.hidden_layers.0
+model.generation_aligner.activation_fn
+model.generation_head
+model.generation_head.proj_out
+model.generation_head.activation_fn
+model.generation_head.vision_head
+model.language_model
+model.language_model.embed_tokens
+model.language_model.layers
+model.language_model.layers.0
+model.language_model.layers.0.self_attn
+model.language_model.layers.0.self_attn.q_proj
+model.language_model.layers.0.self_attn.k_proj
+model.language_model.layers.0.self_attn.v_proj
+model.language_model.layers.0.self_attn.o_proj
+model.language_model.layers.0.mlp
+model.language_model.layers.0.mlp.gate_proj
+model.language_model.layers.0.mlp.up_proj
+model.language_model.layers.0.mlp.down_proj
+model.language_model.layers.0.mlp.act_fn
+model.language_model.layers.0.input_layernorm
+model.language_model.layers.0.post_attention_layernorm
+model.language_model.layers.1
+model.language_model.layers.1.self_attn
+model.language_model.layers.1.self_attn.q_proj
+model.language_model.layers.1.self_attn.k_proj
+model.language_model.layers.1.self_attn.v_proj
+model.language_model.layers.1.self_attn.o_proj
+model.language_model.layers.1.mlp
+model.language_model.layers.1.mlp.gate_proj
+model.language_model.layers.1.mlp.up_proj
+model.language_model.layers.1.mlp.down_proj
+model.language_model.layers.1.mlp.act_fn
+model.language_model.layers.1.input_layernorm
+model.language_model.layers.1.post_attention_layernorm
+model.language_model.layers.2
+model.language_model.layers.2.self_attn
+model.language_model.layers.2.self_attn.q_proj
+model.language_model.layers.2.self_attn.k_proj
+model.language_model.layers.2.self_attn.v_proj
+model.language_model.layers.2.self_attn.o_proj
+model.language_model.layers.2.mlp
+model.language_model.layers.2.mlp.gate_proj
+model.language_model.layers.2.mlp.up_proj
+model.language_model.layers.2.mlp.down_proj
+model.language_model.layers.2.mlp.act_fn
+model.language_model.layers.2.input_layernorm
+model.language_model.layers.2.post_attention_layernorm
+model.language_model.layers.3
+model.language_model.layers.3.self_attn
+model.language_model.layers.3.self_attn.q_proj
+model.language_model.layers.3.self_attn.k_proj
+model.language_model.layers.3.self_attn.v_proj
+model.language_model.layers.3.self_attn.o_proj
+model.language_model.layers.3.mlp
+model.language_model.layers.3.mlp.gate_proj
+model.language_model.layers.3.mlp.up_proj
+model.language_model.layers.3.mlp.down_proj
+model.language_model.layers.3.mlp.act_fn
+model.language_model.layers.3.input_layernorm
+model.language_model.layers.3.post_attention_layernorm
+model.language_model.layers.4
+model.language_model.layers.4.self_attn
+model.language_model.layers.4.self_attn.q_proj
+model.language_model.layers.4.self_attn.k_proj
+model.language_model.layers.4.self_attn.v_proj
+model.language_model.layers.4.self_attn.o_proj
+model.language_model.layers.4.mlp
+model.language_model.layers.4.mlp.gate_proj
+model.language_model.layers.4.mlp.up_proj
+model.language_model.layers.4.mlp.down_proj
+model.language_model.layers.4.mlp.act_fn
+model.language_model.layers.4.input_layernorm
+model.language_model.layers.4.post_attention_layernorm
+model.language_model.layers.5
+model.language_model.layers.5.self_attn
+model.language_model.layers.5.self_attn.q_proj
+model.language_model.layers.5.self_attn.k_proj
+model.language_model.layers.5.self_attn.v_proj
+model.language_model.layers.5.self_attn.o_proj
+model.language_model.layers.5.mlp
+model.language_model.layers.5.mlp.gate_proj
+model.language_model.layers.5.mlp.up_proj
+model.language_model.layers.5.mlp.down_proj
+model.language_model.layers.5.mlp.act_fn
+model.language_model.layers.5.input_layernorm
+model.language_model.layers.5.post_attention_layernorm
+model.language_model.layers.6
+model.language_model.layers.6.self_attn
+model.language_model.layers.6.self_attn.q_proj
+model.language_model.layers.6.self_attn.k_proj
+model.language_model.layers.6.self_attn.v_proj
+model.language_model.layers.6.self_attn.o_proj
+model.language_model.layers.6.mlp
+model.language_model.layers.6.mlp.gate_proj
+model.language_model.layers.6.mlp.up_proj
+model.language_model.layers.6.mlp.down_proj
+model.language_model.layers.6.mlp.act_fn
+model.language_model.layers.6.input_layernorm
+model.language_model.layers.6.post_attention_layernorm
+model.language_model.layers.7
+model.language_model.layers.7.self_attn
+model.language_model.layers.7.self_attn.q_proj
+model.language_model.layers.7.self_attn.k_proj
+model.language_model.layers.7.self_attn.v_proj
+model.language_model.layers.7.self_attn.o_proj
+model.language_model.layers.7.mlp
+model.language_model.layers.7.mlp.gate_proj
+model.language_model.layers.7.mlp.up_proj
+model.language_model.layers.7.mlp.down_proj
+model.language_model.layers.7.mlp.act_fn
+model.language_model.layers.7.input_layernorm
+model.language_model.layers.7.post_attention_layernorm
+model.language_model.layers.8
+model.language_model.layers.8.self_attn
+model.language_model.layers.8.self_attn.q_proj
+model.language_model.layers.8.self_attn.k_proj
+model.language_model.layers.8.self_attn.v_proj
+model.language_model.layers.8.self_attn.o_proj
+model.language_model.layers.8.mlp
+model.language_model.layers.8.mlp.gate_proj
+model.language_model.layers.8.mlp.up_proj
+model.language_model.layers.8.mlp.down_proj
+model.language_model.layers.8.mlp.act_fn
+model.language_model.layers.8.input_layernorm
+model.language_model.layers.8.post_attention_layernorm
+model.language_model.layers.9
+model.language_model.layers.9.self_attn
+model.language_model.layers.9.self_attn.q_proj
+model.language_model.layers.9.self_attn.k_proj
+model.language_model.layers.9.self_attn.v_proj
+model.language_model.layers.9.self_attn.o_proj
+model.language_model.layers.9.mlp
+model.language_model.layers.9.mlp.gate_proj
+model.language_model.layers.9.mlp.up_proj
+model.language_model.layers.9.mlp.down_proj
+model.language_model.layers.9.mlp.act_fn
+model.language_model.layers.9.input_layernorm
+model.language_model.layers.9.post_attention_layernorm
+model.language_model.layers.10
+model.language_model.layers.10.self_attn
+model.language_model.layers.10.self_attn.q_proj
+model.language_model.layers.10.self_attn.k_proj
+model.language_model.layers.10.self_attn.v_proj
+model.language_model.layers.10.self_attn.o_proj
+model.language_model.layers.10.mlp
+model.language_model.layers.10.mlp.gate_proj
+model.language_model.layers.10.mlp.up_proj
+model.language_model.layers.10.mlp.down_proj
+model.language_model.layers.10.mlp.act_fn
+model.language_model.layers.10.input_layernorm
+model.language_model.layers.10.post_attention_layernorm
+model.language_model.layers.11
+model.language_model.layers.11.self_attn
+model.language_model.layers.11.self_attn.q_proj
+model.language_model.layers.11.self_attn.k_proj
+model.language_model.layers.11.self_attn.v_proj
+model.language_model.layers.11.self_attn.o_proj
+model.language_model.layers.11.mlp
+model.language_model.layers.11.mlp.gate_proj
+model.language_model.layers.11.mlp.up_proj
+model.language_model.layers.11.mlp.down_proj
+model.language_model.layers.11.mlp.act_fn
+model.language_model.layers.11.input_layernorm
+model.language_model.layers.11.post_attention_layernorm
+model.language_model.layers.12
+model.language_model.layers.12.self_attn
+model.language_model.layers.12.self_attn.q_proj
+model.language_model.layers.12.self_attn.k_proj
+model.language_model.layers.12.self_attn.v_proj
+model.language_model.layers.12.self_attn.o_proj
+model.language_model.layers.12.mlp
+model.language_model.layers.12.mlp.gate_proj
+model.language_model.layers.12.mlp.up_proj
+model.language_model.layers.12.mlp.down_proj
+model.language_model.layers.12.mlp.act_fn
+model.language_model.layers.12.input_layernorm
+model.language_model.layers.12.post_attention_layernorm
+model.language_model.layers.13
+model.language_model.layers.13.self_attn
+model.language_model.layers.13.self_attn.q_proj
+model.language_model.layers.13.self_attn.k_proj
+model.language_model.layers.13.self_attn.v_proj
+model.language_model.layers.13.self_attn.o_proj
+model.language_model.layers.13.mlp
+model.language_model.layers.13.mlp.gate_proj
+model.language_model.layers.13.mlp.up_proj
+model.language_model.layers.13.mlp.down_proj
+model.language_model.layers.13.mlp.act_fn
+model.language_model.layers.13.input_layernorm
+model.language_model.layers.13.post_attention_layernorm
+model.language_model.layers.14
+model.language_model.layers.14.self_attn
+model.language_model.layers.14.self_attn.q_proj
+model.language_model.layers.14.self_attn.k_proj
+model.language_model.layers.14.self_attn.v_proj
+model.language_model.layers.14.self_attn.o_proj
+model.language_model.layers.14.mlp
+model.language_model.layers.14.mlp.gate_proj
+model.language_model.layers.14.mlp.up_proj
+model.language_model.layers.14.mlp.down_proj
+model.language_model.layers.14.mlp.act_fn
+model.language_model.layers.14.input_layernorm
+model.language_model.layers.14.post_attention_layernorm
+model.language_model.layers.15
+model.language_model.layers.15.self_attn
+model.language_model.layers.15.self_attn.q_proj
+model.language_model.layers.15.self_attn.k_proj
+model.language_model.layers.15.self_attn.v_proj
+model.language_model.layers.15.self_attn.o_proj
+model.language_model.layers.15.mlp
+model.language_model.layers.15.mlp.gate_proj
+model.language_model.layers.15.mlp.up_proj
+model.language_model.layers.15.mlp.down_proj
+model.language_model.layers.15.mlp.act_fn
+model.language_model.layers.15.input_layernorm
+model.language_model.layers.15.post_attention_layernorm
+model.language_model.layers.16
+model.language_model.layers.16.self_attn
+model.language_model.layers.16.self_attn.q_proj
+model.language_model.layers.16.self_attn.k_proj
+model.language_model.layers.16.self_attn.v_proj
+model.language_model.layers.16.self_attn.o_proj
+model.language_model.layers.16.mlp
+model.language_model.layers.16.mlp.gate_proj
+model.language_model.layers.16.mlp.up_proj
+model.language_model.layers.16.mlp.down_proj
+model.language_model.layers.16.mlp.act_fn
+model.language_model.layers.16.input_layernorm
+model.language_model.layers.16.post_attention_layernorm
+model.language_model.layers.17
+model.language_model.layers.17.self_attn
+model.language_model.layers.17.self_attn.q_proj
+model.language_model.layers.17.self_attn.k_proj
+model.language_model.layers.17.self_attn.v_proj
+model.language_model.layers.17.self_attn.o_proj
+model.language_model.layers.17.mlp
+model.language_model.layers.17.mlp.gate_proj
+model.language_model.layers.17.mlp.up_proj
+model.language_model.layers.17.mlp.down_proj
+model.language_model.layers.17.mlp.act_fn
+model.language_model.layers.17.input_layernorm
+model.language_model.layers.17.post_attention_layernorm
+model.language_model.layers.18
+model.language_model.layers.18.self_attn
+model.language_model.layers.18.self_attn.q_proj
+model.language_model.layers.18.self_attn.k_proj
+model.language_model.layers.18.self_attn.v_proj
+model.language_model.layers.18.self_attn.o_proj
+model.language_model.layers.18.mlp
+model.language_model.layers.18.mlp.gate_proj
+model.language_model.layers.18.mlp.up_proj
+model.language_model.layers.18.mlp.down_proj
+model.language_model.layers.18.mlp.act_fn
+model.language_model.layers.18.input_layernorm
+model.language_model.layers.18.post_attention_layernorm
+model.language_model.layers.19
+model.language_model.layers.19.self_attn
+model.language_model.layers.19.self_attn.q_proj
+model.language_model.layers.19.self_attn.k_proj
+model.language_model.layers.19.self_attn.v_proj
+model.language_model.layers.19.self_attn.o_proj
+model.language_model.layers.19.mlp
+model.language_model.layers.19.mlp.gate_proj
+model.language_model.layers.19.mlp.up_proj
+model.language_model.layers.19.mlp.down_proj
+model.language_model.layers.19.mlp.act_fn
+model.language_model.layers.19.input_layernorm
+model.language_model.layers.19.post_attention_layernorm
+model.language_model.layers.20
+model.language_model.layers.20.self_attn
+model.language_model.layers.20.self_attn.q_proj
+model.language_model.layers.20.self_attn.k_proj
+model.language_model.layers.20.self_attn.v_proj
+model.language_model.layers.20.self_attn.o_proj
+model.language_model.layers.20.mlp
+model.language_model.layers.20.mlp.gate_proj
+model.language_model.layers.20.mlp.up_proj
+model.language_model.layers.20.mlp.down_proj
+model.language_model.layers.20.mlp.act_fn
+model.language_model.layers.20.input_layernorm
+model.language_model.layers.20.post_attention_layernorm
+model.language_model.layers.21
+model.language_model.layers.21.self_attn
+model.language_model.layers.21.self_attn.q_proj
+model.language_model.layers.21.self_attn.k_proj
+model.language_model.layers.21.self_attn.v_proj
+model.language_model.layers.21.self_attn.o_proj
+model.language_model.layers.21.mlp
+model.language_model.layers.21.mlp.gate_proj
+model.language_model.layers.21.mlp.up_proj
+model.language_model.layers.21.mlp.down_proj
+model.language_model.layers.21.mlp.act_fn
+model.language_model.layers.21.input_layernorm
+model.language_model.layers.21.post_attention_layernorm
+model.language_model.layers.22
+model.language_model.layers.22.self_attn
+model.language_model.layers.22.self_attn.q_proj
+model.language_model.layers.22.self_attn.k_proj
+model.language_model.layers.22.self_attn.v_proj
+model.language_model.layers.22.self_attn.o_proj
+model.language_model.layers.22.mlp
+model.language_model.layers.22.mlp.gate_proj
+model.language_model.layers.22.mlp.up_proj
+model.language_model.layers.22.mlp.down_proj
+model.language_model.layers.22.mlp.act_fn
+model.language_model.layers.22.input_layernorm
+model.language_model.layers.22.post_attention_layernorm
+model.language_model.layers.23
+model.language_model.layers.23.self_attn
+model.language_model.layers.23.self_attn.q_proj
+model.language_model.layers.23.self_attn.k_proj
+model.language_model.layers.23.self_attn.v_proj
+model.language_model.layers.23.self_attn.o_proj
+model.language_model.layers.23.mlp
+model.language_model.layers.23.mlp.gate_proj
+model.language_model.layers.23.mlp.up_proj
+model.language_model.layers.23.mlp.down_proj
+model.language_model.layers.23.mlp.act_fn
+model.language_model.layers.23.input_layernorm
+model.language_model.layers.23.post_attention_layernorm
+model.language_model.norm
+model.language_model.rotary_emb
+lm_head
diff --git a/logs/facebook/Perception-LM-1B.txt b/logs/facebook/Perception-LM-1B.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19866bd6d4b645bd5d4ad93567131ff35d90033e
--- /dev/null
+++ b/logs/facebook/Perception-LM-1B.txt
@@ -0,0 +1,694 @@
+
+model
+model.vision_tower
+model.vision_tower.timm_model
+model.vision_tower.timm_model.patch_embed
+model.vision_tower.timm_model.patch_embed.proj
+model.vision_tower.timm_model.patch_embed.norm
+model.vision_tower.timm_model.pos_drop
+model.vision_tower.timm_model.rope
+model.vision_tower.timm_model.norm_pre
+model.vision_tower.timm_model.blocks
+model.vision_tower.timm_model.blocks.0
+model.vision_tower.timm_model.blocks.0.norm1
+model.vision_tower.timm_model.blocks.0.attn
+model.vision_tower.timm_model.blocks.0.attn.qkv
+model.vision_tower.timm_model.blocks.0.attn.q_norm
+model.vision_tower.timm_model.blocks.0.attn.k_norm
+model.vision_tower.timm_model.blocks.0.attn.attn_drop
+model.vision_tower.timm_model.blocks.0.attn.norm
+model.vision_tower.timm_model.blocks.0.attn.proj
+model.vision_tower.timm_model.blocks.0.attn.proj_drop
+model.vision_tower.timm_model.blocks.0.drop_path1
+model.vision_tower.timm_model.blocks.0.norm2
+model.vision_tower.timm_model.blocks.0.mlp
+model.vision_tower.timm_model.blocks.0.mlp.fc1
+model.vision_tower.timm_model.blocks.0.mlp.act
+model.vision_tower.timm_model.blocks.0.mlp.drop1
+model.vision_tower.timm_model.blocks.0.mlp.norm
+model.vision_tower.timm_model.blocks.0.mlp.fc2
+model.vision_tower.timm_model.blocks.0.mlp.drop2
+model.vision_tower.timm_model.blocks.0.drop_path2
+model.vision_tower.timm_model.blocks.1
+model.vision_tower.timm_model.blocks.1.norm1
+model.vision_tower.timm_model.blocks.1.attn
+model.vision_tower.timm_model.blocks.1.attn.qkv
+model.vision_tower.timm_model.blocks.1.attn.q_norm
+model.vision_tower.timm_model.blocks.1.attn.k_norm
+model.vision_tower.timm_model.blocks.1.attn.attn_drop
+model.vision_tower.timm_model.blocks.1.attn.norm
+model.vision_tower.timm_model.blocks.1.attn.proj
+model.vision_tower.timm_model.blocks.1.attn.proj_drop
+model.vision_tower.timm_model.blocks.1.drop_path1
+model.vision_tower.timm_model.blocks.1.norm2
+model.vision_tower.timm_model.blocks.1.mlp
+model.vision_tower.timm_model.blocks.1.mlp.fc1
+model.vision_tower.timm_model.blocks.1.mlp.act
+model.vision_tower.timm_model.blocks.1.mlp.drop1
+model.vision_tower.timm_model.blocks.1.mlp.norm
+model.vision_tower.timm_model.blocks.1.mlp.fc2
+model.vision_tower.timm_model.blocks.1.mlp.drop2
+model.vision_tower.timm_model.blocks.1.drop_path2
+model.vision_tower.timm_model.blocks.2
+model.vision_tower.timm_model.blocks.2.norm1
+model.vision_tower.timm_model.blocks.2.attn
+model.vision_tower.timm_model.blocks.2.attn.qkv
+model.vision_tower.timm_model.blocks.2.attn.q_norm
+model.vision_tower.timm_model.blocks.2.attn.k_norm
+model.vision_tower.timm_model.blocks.2.attn.attn_drop
+model.vision_tower.timm_model.blocks.2.attn.norm
+model.vision_tower.timm_model.blocks.2.attn.proj
+model.vision_tower.timm_model.blocks.2.attn.proj_drop
+model.vision_tower.timm_model.blocks.2.drop_path1
+model.vision_tower.timm_model.blocks.2.norm2
+model.vision_tower.timm_model.blocks.2.mlp
+model.vision_tower.timm_model.blocks.2.mlp.fc1
+model.vision_tower.timm_model.blocks.2.mlp.act
+model.vision_tower.timm_model.blocks.2.mlp.drop1
+model.vision_tower.timm_model.blocks.2.mlp.norm
+model.vision_tower.timm_model.blocks.2.mlp.fc2
+model.vision_tower.timm_model.blocks.2.mlp.drop2
+model.vision_tower.timm_model.blocks.2.drop_path2
+model.vision_tower.timm_model.blocks.3
+model.vision_tower.timm_model.blocks.3.norm1
+model.vision_tower.timm_model.blocks.3.attn
+model.vision_tower.timm_model.blocks.3.attn.qkv
+model.vision_tower.timm_model.blocks.3.attn.q_norm
+model.vision_tower.timm_model.blocks.3.attn.k_norm
+model.vision_tower.timm_model.blocks.3.attn.attn_drop
+model.vision_tower.timm_model.blocks.3.attn.norm
+model.vision_tower.timm_model.blocks.3.attn.proj
+model.vision_tower.timm_model.blocks.3.attn.proj_drop
+model.vision_tower.timm_model.blocks.3.drop_path1
+model.vision_tower.timm_model.blocks.3.norm2
+model.vision_tower.timm_model.blocks.3.mlp
+model.vision_tower.timm_model.blocks.3.mlp.fc1
+model.vision_tower.timm_model.blocks.3.mlp.act
+model.vision_tower.timm_model.blocks.3.mlp.drop1
+model.vision_tower.timm_model.blocks.3.mlp.norm
+model.vision_tower.timm_model.blocks.3.mlp.fc2
+model.vision_tower.timm_model.blocks.3.mlp.drop2
+model.vision_tower.timm_model.blocks.3.drop_path2
+model.vision_tower.timm_model.blocks.4
+model.vision_tower.timm_model.blocks.4.norm1
+model.vision_tower.timm_model.blocks.4.attn
+model.vision_tower.timm_model.blocks.4.attn.qkv
+model.vision_tower.timm_model.blocks.4.attn.q_norm
+model.vision_tower.timm_model.blocks.4.attn.k_norm
+model.vision_tower.timm_model.blocks.4.attn.attn_drop
+model.vision_tower.timm_model.blocks.4.attn.norm
+model.vision_tower.timm_model.blocks.4.attn.proj
+model.vision_tower.timm_model.blocks.4.attn.proj_drop
+model.vision_tower.timm_model.blocks.4.drop_path1
+model.vision_tower.timm_model.blocks.4.norm2
+model.vision_tower.timm_model.blocks.4.mlp
+model.vision_tower.timm_model.blocks.4.mlp.fc1
+model.vision_tower.timm_model.blocks.4.mlp.act
+model.vision_tower.timm_model.blocks.4.mlp.drop1
+model.vision_tower.timm_model.blocks.4.mlp.norm
+model.vision_tower.timm_model.blocks.4.mlp.fc2
+model.vision_tower.timm_model.blocks.4.mlp.drop2
+model.vision_tower.timm_model.blocks.4.drop_path2
+model.vision_tower.timm_model.blocks.5
+model.vision_tower.timm_model.blocks.5.norm1
+model.vision_tower.timm_model.blocks.5.attn
+model.vision_tower.timm_model.blocks.5.attn.qkv
+model.vision_tower.timm_model.blocks.5.attn.q_norm
+model.vision_tower.timm_model.blocks.5.attn.k_norm
+model.vision_tower.timm_model.blocks.5.attn.attn_drop
+model.vision_tower.timm_model.blocks.5.attn.norm
+model.vision_tower.timm_model.blocks.5.attn.proj
+model.vision_tower.timm_model.blocks.5.attn.proj_drop
+model.vision_tower.timm_model.blocks.5.drop_path1
+model.vision_tower.timm_model.blocks.5.norm2
+model.vision_tower.timm_model.blocks.5.mlp
+model.vision_tower.timm_model.blocks.5.mlp.fc1
+model.vision_tower.timm_model.blocks.5.mlp.act
+model.vision_tower.timm_model.blocks.5.mlp.drop1
+model.vision_tower.timm_model.blocks.5.mlp.norm
+model.vision_tower.timm_model.blocks.5.mlp.fc2
+model.vision_tower.timm_model.blocks.5.mlp.drop2
+model.vision_tower.timm_model.blocks.5.drop_path2
+model.vision_tower.timm_model.blocks.6
+model.vision_tower.timm_model.blocks.6.norm1
+model.vision_tower.timm_model.blocks.6.attn
+model.vision_tower.timm_model.blocks.6.attn.qkv
+model.vision_tower.timm_model.blocks.6.attn.q_norm
+model.vision_tower.timm_model.blocks.6.attn.k_norm
+model.vision_tower.timm_model.blocks.6.attn.attn_drop
+model.vision_tower.timm_model.blocks.6.attn.norm
+model.vision_tower.timm_model.blocks.6.attn.proj
+model.vision_tower.timm_model.blocks.6.attn.proj_drop
+model.vision_tower.timm_model.blocks.6.drop_path1
+model.vision_tower.timm_model.blocks.6.norm2
+model.vision_tower.timm_model.blocks.6.mlp
+model.vision_tower.timm_model.blocks.6.mlp.fc1
+model.vision_tower.timm_model.blocks.6.mlp.act
+model.vision_tower.timm_model.blocks.6.mlp.drop1
+model.vision_tower.timm_model.blocks.6.mlp.norm
+model.vision_tower.timm_model.blocks.6.mlp.fc2
+model.vision_tower.timm_model.blocks.6.mlp.drop2
+model.vision_tower.timm_model.blocks.6.drop_path2
+model.vision_tower.timm_model.blocks.7
+model.vision_tower.timm_model.blocks.7.norm1
+model.vision_tower.timm_model.blocks.7.attn
+model.vision_tower.timm_model.blocks.7.attn.qkv
+model.vision_tower.timm_model.blocks.7.attn.q_norm
+model.vision_tower.timm_model.blocks.7.attn.k_norm
+model.vision_tower.timm_model.blocks.7.attn.attn_drop
+model.vision_tower.timm_model.blocks.7.attn.norm
+model.vision_tower.timm_model.blocks.7.attn.proj
+model.vision_tower.timm_model.blocks.7.attn.proj_drop
+model.vision_tower.timm_model.blocks.7.drop_path1
+model.vision_tower.timm_model.blocks.7.norm2
+model.vision_tower.timm_model.blocks.7.mlp
+model.vision_tower.timm_model.blocks.7.mlp.fc1
+model.vision_tower.timm_model.blocks.7.mlp.act
+model.vision_tower.timm_model.blocks.7.mlp.drop1
+model.vision_tower.timm_model.blocks.7.mlp.norm
+model.vision_tower.timm_model.blocks.7.mlp.fc2
+model.vision_tower.timm_model.blocks.7.mlp.drop2
+model.vision_tower.timm_model.blocks.7.drop_path2
+model.vision_tower.timm_model.blocks.8
+model.vision_tower.timm_model.blocks.8.norm1
+model.vision_tower.timm_model.blocks.8.attn
+model.vision_tower.timm_model.blocks.8.attn.qkv
+model.vision_tower.timm_model.blocks.8.attn.q_norm
+model.vision_tower.timm_model.blocks.8.attn.k_norm
+model.vision_tower.timm_model.blocks.8.attn.attn_drop
+model.vision_tower.timm_model.blocks.8.attn.norm
+model.vision_tower.timm_model.blocks.8.attn.proj
+model.vision_tower.timm_model.blocks.8.attn.proj_drop
+model.vision_tower.timm_model.blocks.8.drop_path1
+model.vision_tower.timm_model.blocks.8.norm2
+model.vision_tower.timm_model.blocks.8.mlp
+model.vision_tower.timm_model.blocks.8.mlp.fc1
+model.vision_tower.timm_model.blocks.8.mlp.act
+model.vision_tower.timm_model.blocks.8.mlp.drop1
+model.vision_tower.timm_model.blocks.8.mlp.norm
+model.vision_tower.timm_model.blocks.8.mlp.fc2
+model.vision_tower.timm_model.blocks.8.mlp.drop2
+model.vision_tower.timm_model.blocks.8.drop_path2
+model.vision_tower.timm_model.blocks.9
+model.vision_tower.timm_model.blocks.9.norm1
+model.vision_tower.timm_model.blocks.9.attn
+model.vision_tower.timm_model.blocks.9.attn.qkv
+model.vision_tower.timm_model.blocks.9.attn.q_norm
+model.vision_tower.timm_model.blocks.9.attn.k_norm
+model.vision_tower.timm_model.blocks.9.attn.attn_drop
+model.vision_tower.timm_model.blocks.9.attn.norm
+model.vision_tower.timm_model.blocks.9.attn.proj
+model.vision_tower.timm_model.blocks.9.attn.proj_drop
+model.vision_tower.timm_model.blocks.9.drop_path1
+model.vision_tower.timm_model.blocks.9.norm2
+model.vision_tower.timm_model.blocks.9.mlp
+model.vision_tower.timm_model.blocks.9.mlp.fc1
+model.vision_tower.timm_model.blocks.9.mlp.act
+model.vision_tower.timm_model.blocks.9.mlp.drop1
+model.vision_tower.timm_model.blocks.9.mlp.norm
+model.vision_tower.timm_model.blocks.9.mlp.fc2
+model.vision_tower.timm_model.blocks.9.mlp.drop2
+model.vision_tower.timm_model.blocks.9.drop_path2
+model.vision_tower.timm_model.blocks.10
+model.vision_tower.timm_model.blocks.10.norm1
+model.vision_tower.timm_model.blocks.10.attn
+model.vision_tower.timm_model.blocks.10.attn.qkv
+model.vision_tower.timm_model.blocks.10.attn.q_norm
+model.vision_tower.timm_model.blocks.10.attn.k_norm
+model.vision_tower.timm_model.blocks.10.attn.attn_drop
+model.vision_tower.timm_model.blocks.10.attn.norm
+model.vision_tower.timm_model.blocks.10.attn.proj
+model.vision_tower.timm_model.blocks.10.attn.proj_drop
+model.vision_tower.timm_model.blocks.10.drop_path1
+model.vision_tower.timm_model.blocks.10.norm2
+model.vision_tower.timm_model.blocks.10.mlp
+model.vision_tower.timm_model.blocks.10.mlp.fc1
+model.vision_tower.timm_model.blocks.10.mlp.act
+model.vision_tower.timm_model.blocks.10.mlp.drop1
+model.vision_tower.timm_model.blocks.10.mlp.norm
+model.vision_tower.timm_model.blocks.10.mlp.fc2
+model.vision_tower.timm_model.blocks.10.mlp.drop2
+model.vision_tower.timm_model.blocks.10.drop_path2
+model.vision_tower.timm_model.blocks.11
+model.vision_tower.timm_model.blocks.11.norm1
+model.vision_tower.timm_model.blocks.11.attn
+model.vision_tower.timm_model.blocks.11.attn.qkv
+model.vision_tower.timm_model.blocks.11.attn.q_norm
+model.vision_tower.timm_model.blocks.11.attn.k_norm
+model.vision_tower.timm_model.blocks.11.attn.attn_drop
+model.vision_tower.timm_model.blocks.11.attn.norm
+model.vision_tower.timm_model.blocks.11.attn.proj
+model.vision_tower.timm_model.blocks.11.attn.proj_drop
+model.vision_tower.timm_model.blocks.11.drop_path1
+model.vision_tower.timm_model.blocks.11.norm2
+model.vision_tower.timm_model.blocks.11.mlp
+model.vision_tower.timm_model.blocks.11.mlp.fc1
+model.vision_tower.timm_model.blocks.11.mlp.act
+model.vision_tower.timm_model.blocks.11.mlp.drop1
+model.vision_tower.timm_model.blocks.11.mlp.norm
+model.vision_tower.timm_model.blocks.11.mlp.fc2
+model.vision_tower.timm_model.blocks.11.mlp.drop2
+model.vision_tower.timm_model.blocks.11.drop_path2
+model.vision_tower.timm_model.blocks.12
+model.vision_tower.timm_model.blocks.12.norm1
+model.vision_tower.timm_model.blocks.12.attn
+model.vision_tower.timm_model.blocks.12.attn.qkv
+model.vision_tower.timm_model.blocks.12.attn.q_norm
+model.vision_tower.timm_model.blocks.12.attn.k_norm
+model.vision_tower.timm_model.blocks.12.attn.attn_drop
+model.vision_tower.timm_model.blocks.12.attn.norm
+model.vision_tower.timm_model.blocks.12.attn.proj
+model.vision_tower.timm_model.blocks.12.attn.proj_drop
+model.vision_tower.timm_model.blocks.12.drop_path1
+model.vision_tower.timm_model.blocks.12.norm2
+model.vision_tower.timm_model.blocks.12.mlp
+model.vision_tower.timm_model.blocks.12.mlp.fc1
+model.vision_tower.timm_model.blocks.12.mlp.act
+model.vision_tower.timm_model.blocks.12.mlp.drop1
+model.vision_tower.timm_model.blocks.12.mlp.norm
+model.vision_tower.timm_model.blocks.12.mlp.fc2
+model.vision_tower.timm_model.blocks.12.mlp.drop2
+model.vision_tower.timm_model.blocks.12.drop_path2
+model.vision_tower.timm_model.blocks.13
+model.vision_tower.timm_model.blocks.13.norm1
+model.vision_tower.timm_model.blocks.13.attn
+model.vision_tower.timm_model.blocks.13.attn.qkv
+model.vision_tower.timm_model.blocks.13.attn.q_norm
+model.vision_tower.timm_model.blocks.13.attn.k_norm
+model.vision_tower.timm_model.blocks.13.attn.attn_drop
+model.vision_tower.timm_model.blocks.13.attn.norm
+model.vision_tower.timm_model.blocks.13.attn.proj
+model.vision_tower.timm_model.blocks.13.attn.proj_drop
+model.vision_tower.timm_model.blocks.13.drop_path1
+model.vision_tower.timm_model.blocks.13.norm2
+model.vision_tower.timm_model.blocks.13.mlp
+model.vision_tower.timm_model.blocks.13.mlp.fc1
+model.vision_tower.timm_model.blocks.13.mlp.act
+model.vision_tower.timm_model.blocks.13.mlp.drop1
+model.vision_tower.timm_model.blocks.13.mlp.norm
+model.vision_tower.timm_model.blocks.13.mlp.fc2
+model.vision_tower.timm_model.blocks.13.mlp.drop2
+model.vision_tower.timm_model.blocks.13.drop_path2
+model.vision_tower.timm_model.blocks.14
+model.vision_tower.timm_model.blocks.14.norm1
+model.vision_tower.timm_model.blocks.14.attn
+model.vision_tower.timm_model.blocks.14.attn.qkv
+model.vision_tower.timm_model.blocks.14.attn.q_norm
+model.vision_tower.timm_model.blocks.14.attn.k_norm
+model.vision_tower.timm_model.blocks.14.attn.attn_drop
+model.vision_tower.timm_model.blocks.14.attn.norm
+model.vision_tower.timm_model.blocks.14.attn.proj
+model.vision_tower.timm_model.blocks.14.attn.proj_drop
+model.vision_tower.timm_model.blocks.14.drop_path1
+model.vision_tower.timm_model.blocks.14.norm2
+model.vision_tower.timm_model.blocks.14.mlp
+model.vision_tower.timm_model.blocks.14.mlp.fc1
+model.vision_tower.timm_model.blocks.14.mlp.act
+model.vision_tower.timm_model.blocks.14.mlp.drop1
+model.vision_tower.timm_model.blocks.14.mlp.norm
+model.vision_tower.timm_model.blocks.14.mlp.fc2
+model.vision_tower.timm_model.blocks.14.mlp.drop2
+model.vision_tower.timm_model.blocks.14.drop_path2
+model.vision_tower.timm_model.blocks.15
+model.vision_tower.timm_model.blocks.15.norm1
+model.vision_tower.timm_model.blocks.15.attn
+model.vision_tower.timm_model.blocks.15.attn.qkv
+model.vision_tower.timm_model.blocks.15.attn.q_norm
+model.vision_tower.timm_model.blocks.15.attn.k_norm
+model.vision_tower.timm_model.blocks.15.attn.attn_drop
+model.vision_tower.timm_model.blocks.15.attn.norm
+model.vision_tower.timm_model.blocks.15.attn.proj
+model.vision_tower.timm_model.blocks.15.attn.proj_drop
+model.vision_tower.timm_model.blocks.15.drop_path1
+model.vision_tower.timm_model.blocks.15.norm2
+model.vision_tower.timm_model.blocks.15.mlp
+model.vision_tower.timm_model.blocks.15.mlp.fc1
+model.vision_tower.timm_model.blocks.15.mlp.act
+model.vision_tower.timm_model.blocks.15.mlp.drop1
+model.vision_tower.timm_model.blocks.15.mlp.norm
+model.vision_tower.timm_model.blocks.15.mlp.fc2
+model.vision_tower.timm_model.blocks.15.mlp.drop2
+model.vision_tower.timm_model.blocks.15.drop_path2
+model.vision_tower.timm_model.blocks.16
+model.vision_tower.timm_model.blocks.16.norm1
+model.vision_tower.timm_model.blocks.16.attn
+model.vision_tower.timm_model.blocks.16.attn.qkv
+model.vision_tower.timm_model.blocks.16.attn.q_norm
+model.vision_tower.timm_model.blocks.16.attn.k_norm
+model.vision_tower.timm_model.blocks.16.attn.attn_drop
+model.vision_tower.timm_model.blocks.16.attn.norm
+model.vision_tower.timm_model.blocks.16.attn.proj
+model.vision_tower.timm_model.blocks.16.attn.proj_drop
+model.vision_tower.timm_model.blocks.16.drop_path1
+model.vision_tower.timm_model.blocks.16.norm2
+model.vision_tower.timm_model.blocks.16.mlp
+model.vision_tower.timm_model.blocks.16.mlp.fc1
+model.vision_tower.timm_model.blocks.16.mlp.act
+model.vision_tower.timm_model.blocks.16.mlp.drop1
+model.vision_tower.timm_model.blocks.16.mlp.norm
+model.vision_tower.timm_model.blocks.16.mlp.fc2
+model.vision_tower.timm_model.blocks.16.mlp.drop2
+model.vision_tower.timm_model.blocks.16.drop_path2
+model.vision_tower.timm_model.blocks.17
+model.vision_tower.timm_model.blocks.17.norm1
+model.vision_tower.timm_model.blocks.17.attn
+model.vision_tower.timm_model.blocks.17.attn.qkv
+model.vision_tower.timm_model.blocks.17.attn.q_norm
+model.vision_tower.timm_model.blocks.17.attn.k_norm
+model.vision_tower.timm_model.blocks.17.attn.attn_drop
+model.vision_tower.timm_model.blocks.17.attn.norm
+model.vision_tower.timm_model.blocks.17.attn.proj
+model.vision_tower.timm_model.blocks.17.attn.proj_drop
+model.vision_tower.timm_model.blocks.17.drop_path1
+model.vision_tower.timm_model.blocks.17.norm2
+model.vision_tower.timm_model.blocks.17.mlp
+model.vision_tower.timm_model.blocks.17.mlp.fc1
+model.vision_tower.timm_model.blocks.17.mlp.act
+model.vision_tower.timm_model.blocks.17.mlp.drop1
+model.vision_tower.timm_model.blocks.17.mlp.norm
+model.vision_tower.timm_model.blocks.17.mlp.fc2
+model.vision_tower.timm_model.blocks.17.mlp.drop2
+model.vision_tower.timm_model.blocks.17.drop_path2
+model.vision_tower.timm_model.blocks.18
+model.vision_tower.timm_model.blocks.18.norm1
+model.vision_tower.timm_model.blocks.18.attn
+model.vision_tower.timm_model.blocks.18.attn.qkv
+model.vision_tower.timm_model.blocks.18.attn.q_norm
+model.vision_tower.timm_model.blocks.18.attn.k_norm
+model.vision_tower.timm_model.blocks.18.attn.attn_drop
+model.vision_tower.timm_model.blocks.18.attn.norm
+model.vision_tower.timm_model.blocks.18.attn.proj
+model.vision_tower.timm_model.blocks.18.attn.proj_drop
+model.vision_tower.timm_model.blocks.18.drop_path1
+model.vision_tower.timm_model.blocks.18.norm2
+model.vision_tower.timm_model.blocks.18.mlp
+model.vision_tower.timm_model.blocks.18.mlp.fc1
+model.vision_tower.timm_model.blocks.18.mlp.act
+model.vision_tower.timm_model.blocks.18.mlp.drop1
+model.vision_tower.timm_model.blocks.18.mlp.norm
+model.vision_tower.timm_model.blocks.18.mlp.fc2
+model.vision_tower.timm_model.blocks.18.mlp.drop2
+model.vision_tower.timm_model.blocks.18.drop_path2
+model.vision_tower.timm_model.blocks.19
+model.vision_tower.timm_model.blocks.19.norm1
+model.vision_tower.timm_model.blocks.19.attn
+model.vision_tower.timm_model.blocks.19.attn.qkv
+model.vision_tower.timm_model.blocks.19.attn.q_norm
+model.vision_tower.timm_model.blocks.19.attn.k_norm
+model.vision_tower.timm_model.blocks.19.attn.attn_drop
+model.vision_tower.timm_model.blocks.19.attn.norm
+model.vision_tower.timm_model.blocks.19.attn.proj
+model.vision_tower.timm_model.blocks.19.attn.proj_drop
+model.vision_tower.timm_model.blocks.19.drop_path1
+model.vision_tower.timm_model.blocks.19.norm2
+model.vision_tower.timm_model.blocks.19.mlp
+model.vision_tower.timm_model.blocks.19.mlp.fc1
+model.vision_tower.timm_model.blocks.19.mlp.act
+model.vision_tower.timm_model.blocks.19.mlp.drop1
+model.vision_tower.timm_model.blocks.19.mlp.norm
+model.vision_tower.timm_model.blocks.19.mlp.fc2
+model.vision_tower.timm_model.blocks.19.mlp.drop2
+model.vision_tower.timm_model.blocks.19.drop_path2
+model.vision_tower.timm_model.blocks.20
+model.vision_tower.timm_model.blocks.20.norm1
+model.vision_tower.timm_model.blocks.20.attn
+model.vision_tower.timm_model.blocks.20.attn.qkv
+model.vision_tower.timm_model.blocks.20.attn.q_norm
+model.vision_tower.timm_model.blocks.20.attn.k_norm
+model.vision_tower.timm_model.blocks.20.attn.attn_drop
+model.vision_tower.timm_model.blocks.20.attn.norm
+model.vision_tower.timm_model.blocks.20.attn.proj
+model.vision_tower.timm_model.blocks.20.attn.proj_drop
+model.vision_tower.timm_model.blocks.20.drop_path1
+model.vision_tower.timm_model.blocks.20.norm2
+model.vision_tower.timm_model.blocks.20.mlp
+model.vision_tower.timm_model.blocks.20.mlp.fc1
+model.vision_tower.timm_model.blocks.20.mlp.act
+model.vision_tower.timm_model.blocks.20.mlp.drop1
+model.vision_tower.timm_model.blocks.20.mlp.norm
+model.vision_tower.timm_model.blocks.20.mlp.fc2
+model.vision_tower.timm_model.blocks.20.mlp.drop2
+model.vision_tower.timm_model.blocks.20.drop_path2
+model.vision_tower.timm_model.blocks.21
+model.vision_tower.timm_model.blocks.21.norm1
+model.vision_tower.timm_model.blocks.21.attn
+model.vision_tower.timm_model.blocks.21.attn.qkv
+model.vision_tower.timm_model.blocks.21.attn.q_norm
+model.vision_tower.timm_model.blocks.21.attn.k_norm
+model.vision_tower.timm_model.blocks.21.attn.attn_drop
+model.vision_tower.timm_model.blocks.21.attn.norm
+model.vision_tower.timm_model.blocks.21.attn.proj
+model.vision_tower.timm_model.blocks.21.attn.proj_drop
+model.vision_tower.timm_model.blocks.21.drop_path1
+model.vision_tower.timm_model.blocks.21.norm2
+model.vision_tower.timm_model.blocks.21.mlp
+model.vision_tower.timm_model.blocks.21.mlp.fc1
+model.vision_tower.timm_model.blocks.21.mlp.act
+model.vision_tower.timm_model.blocks.21.mlp.drop1
+model.vision_tower.timm_model.blocks.21.mlp.norm
+model.vision_tower.timm_model.blocks.21.mlp.fc2
+model.vision_tower.timm_model.blocks.21.mlp.drop2
+model.vision_tower.timm_model.blocks.21.drop_path2
+model.vision_tower.timm_model.blocks.22
+model.vision_tower.timm_model.blocks.22.norm1
+model.vision_tower.timm_model.blocks.22.attn
+model.vision_tower.timm_model.blocks.22.attn.qkv
+model.vision_tower.timm_model.blocks.22.attn.q_norm
+model.vision_tower.timm_model.blocks.22.attn.k_norm
+model.vision_tower.timm_model.blocks.22.attn.attn_drop
+model.vision_tower.timm_model.blocks.22.attn.norm
+model.vision_tower.timm_model.blocks.22.attn.proj
+model.vision_tower.timm_model.blocks.22.attn.proj_drop
+model.vision_tower.timm_model.blocks.22.drop_path1
+model.vision_tower.timm_model.blocks.22.norm2
+model.vision_tower.timm_model.blocks.22.mlp
+model.vision_tower.timm_model.blocks.22.mlp.fc1
+model.vision_tower.timm_model.blocks.22.mlp.act
+model.vision_tower.timm_model.blocks.22.mlp.drop1
+model.vision_tower.timm_model.blocks.22.mlp.norm
+model.vision_tower.timm_model.blocks.22.mlp.fc2
+model.vision_tower.timm_model.blocks.22.mlp.drop2
+model.vision_tower.timm_model.blocks.22.drop_path2
+model.vision_tower.timm_model.norm
+model.vision_tower.timm_model.fc_norm
+model.vision_tower.timm_model.head_drop
+model.vision_tower.timm_model.head
+model.multi_modal_projector
+model.multi_modal_projector.linear_1
+model.multi_modal_projector.gelu
+model.multi_modal_projector.linear_2
+model.multi_modal_projector.pooling
+model.language_model
+model.language_model.embed_tokens
+model.language_model.layers
+model.language_model.layers.0
+model.language_model.layers.0.self_attn
+model.language_model.layers.0.self_attn.q_proj
+model.language_model.layers.0.self_attn.k_proj
+model.language_model.layers.0.self_attn.v_proj
+model.language_model.layers.0.self_attn.o_proj
+model.language_model.layers.0.mlp
+model.language_model.layers.0.mlp.gate_proj
+model.language_model.layers.0.mlp.up_proj
+model.language_model.layers.0.mlp.down_proj
+model.language_model.layers.0.mlp.act_fn
+model.language_model.layers.0.input_layernorm
+model.language_model.layers.0.post_attention_layernorm
+model.language_model.layers.1
+model.language_model.layers.1.self_attn
+model.language_model.layers.1.self_attn.q_proj
+model.language_model.layers.1.self_attn.k_proj
+model.language_model.layers.1.self_attn.v_proj
+model.language_model.layers.1.self_attn.o_proj
+model.language_model.layers.1.mlp
+model.language_model.layers.1.mlp.gate_proj
+model.language_model.layers.1.mlp.up_proj
+model.language_model.layers.1.mlp.down_proj
+model.language_model.layers.1.mlp.act_fn
+model.language_model.layers.1.input_layernorm
+model.language_model.layers.1.post_attention_layernorm
+model.language_model.layers.2
+model.language_model.layers.2.self_attn
+model.language_model.layers.2.self_attn.q_proj
+model.language_model.layers.2.self_attn.k_proj
+model.language_model.layers.2.self_attn.v_proj
+model.language_model.layers.2.self_attn.o_proj
+model.language_model.layers.2.mlp
+model.language_model.layers.2.mlp.gate_proj
+model.language_model.layers.2.mlp.up_proj
+model.language_model.layers.2.mlp.down_proj
+model.language_model.layers.2.mlp.act_fn
+model.language_model.layers.2.input_layernorm
+model.language_model.layers.2.post_attention_layernorm
+model.language_model.layers.3
+model.language_model.layers.3.self_attn
+model.language_model.layers.3.self_attn.q_proj
+model.language_model.layers.3.self_attn.k_proj
+model.language_model.layers.3.self_attn.v_proj
+model.language_model.layers.3.self_attn.o_proj
+model.language_model.layers.3.mlp
+model.language_model.layers.3.mlp.gate_proj
+model.language_model.layers.3.mlp.up_proj
+model.language_model.layers.3.mlp.down_proj
+model.language_model.layers.3.mlp.act_fn
+model.language_model.layers.3.input_layernorm
+model.language_model.layers.3.post_attention_layernorm
+model.language_model.layers.4
+model.language_model.layers.4.self_attn
+model.language_model.layers.4.self_attn.q_proj
+model.language_model.layers.4.self_attn.k_proj
+model.language_model.layers.4.self_attn.v_proj
+model.language_model.layers.4.self_attn.o_proj
+model.language_model.layers.4.mlp
+model.language_model.layers.4.mlp.gate_proj
+model.language_model.layers.4.mlp.up_proj
+model.language_model.layers.4.mlp.down_proj
+model.language_model.layers.4.mlp.act_fn
+model.language_model.layers.4.input_layernorm
+model.language_model.layers.4.post_attention_layernorm
+model.language_model.layers.5
+model.language_model.layers.5.self_attn
+model.language_model.layers.5.self_attn.q_proj
+model.language_model.layers.5.self_attn.k_proj
+model.language_model.layers.5.self_attn.v_proj
+model.language_model.layers.5.self_attn.o_proj
+model.language_model.layers.5.mlp
+model.language_model.layers.5.mlp.gate_proj
+model.language_model.layers.5.mlp.up_proj
+model.language_model.layers.5.mlp.down_proj
+model.language_model.layers.5.mlp.act_fn
+model.language_model.layers.5.input_layernorm
+model.language_model.layers.5.post_attention_layernorm
+model.language_model.layers.6
+model.language_model.layers.6.self_attn
+model.language_model.layers.6.self_attn.q_proj
+model.language_model.layers.6.self_attn.k_proj
+model.language_model.layers.6.self_attn.v_proj
+model.language_model.layers.6.self_attn.o_proj
+model.language_model.layers.6.mlp
+model.language_model.layers.6.mlp.gate_proj
+model.language_model.layers.6.mlp.up_proj
+model.language_model.layers.6.mlp.down_proj
+model.language_model.layers.6.mlp.act_fn
+model.language_model.layers.6.input_layernorm
+model.language_model.layers.6.post_attention_layernorm
+model.language_model.layers.7
+model.language_model.layers.7.self_attn
+model.language_model.layers.7.self_attn.q_proj
+model.language_model.layers.7.self_attn.k_proj
+model.language_model.layers.7.self_attn.v_proj
+model.language_model.layers.7.self_attn.o_proj
+model.language_model.layers.7.mlp
+model.language_model.layers.7.mlp.gate_proj
+model.language_model.layers.7.mlp.up_proj
+model.language_model.layers.7.mlp.down_proj
+model.language_model.layers.7.mlp.act_fn
+model.language_model.layers.7.input_layernorm
+model.language_model.layers.7.post_attention_layernorm
+model.language_model.layers.8
+model.language_model.layers.8.self_attn
+model.language_model.layers.8.self_attn.q_proj
+model.language_model.layers.8.self_attn.k_proj
+model.language_model.layers.8.self_attn.v_proj
+model.language_model.layers.8.self_attn.o_proj
+model.language_model.layers.8.mlp
+model.language_model.layers.8.mlp.gate_proj
+model.language_model.layers.8.mlp.up_proj
+model.language_model.layers.8.mlp.down_proj
+model.language_model.layers.8.mlp.act_fn
+model.language_model.layers.8.input_layernorm
+model.language_model.layers.8.post_attention_layernorm
+model.language_model.layers.9
+model.language_model.layers.9.self_attn
+model.language_model.layers.9.self_attn.q_proj
+model.language_model.layers.9.self_attn.k_proj
+model.language_model.layers.9.self_attn.v_proj
+model.language_model.layers.9.self_attn.o_proj
+model.language_model.layers.9.mlp
+model.language_model.layers.9.mlp.gate_proj
+model.language_model.layers.9.mlp.up_proj
+model.language_model.layers.9.mlp.down_proj
+model.language_model.layers.9.mlp.act_fn
+model.language_model.layers.9.input_layernorm
+model.language_model.layers.9.post_attention_layernorm
+model.language_model.layers.10
+model.language_model.layers.10.self_attn
+model.language_model.layers.10.self_attn.q_proj
+model.language_model.layers.10.self_attn.k_proj
+model.language_model.layers.10.self_attn.v_proj
+model.language_model.layers.10.self_attn.o_proj
+model.language_model.layers.10.mlp
+model.language_model.layers.10.mlp.gate_proj
+model.language_model.layers.10.mlp.up_proj
+model.language_model.layers.10.mlp.down_proj
+model.language_model.layers.10.mlp.act_fn
+model.language_model.layers.10.input_layernorm
+model.language_model.layers.10.post_attention_layernorm
+model.language_model.layers.11
+model.language_model.layers.11.self_attn
+model.language_model.layers.11.self_attn.q_proj
+model.language_model.layers.11.self_attn.k_proj
+model.language_model.layers.11.self_attn.v_proj
+model.language_model.layers.11.self_attn.o_proj
+model.language_model.layers.11.mlp
+model.language_model.layers.11.mlp.gate_proj
+model.language_model.layers.11.mlp.up_proj
+model.language_model.layers.11.mlp.down_proj
+model.language_model.layers.11.mlp.act_fn
+model.language_model.layers.11.input_layernorm
+model.language_model.layers.11.post_attention_layernorm
+model.language_model.layers.12
+model.language_model.layers.12.self_attn
+model.language_model.layers.12.self_attn.q_proj
+model.language_model.layers.12.self_attn.k_proj
+model.language_model.layers.12.self_attn.v_proj
+model.language_model.layers.12.self_attn.o_proj
+model.language_model.layers.12.mlp
+model.language_model.layers.12.mlp.gate_proj
+model.language_model.layers.12.mlp.up_proj
+model.language_model.layers.12.mlp.down_proj
+model.language_model.layers.12.mlp.act_fn
+model.language_model.layers.12.input_layernorm
+model.language_model.layers.12.post_attention_layernorm
+model.language_model.layers.13
+model.language_model.layers.13.self_attn
+model.language_model.layers.13.self_attn.q_proj
+model.language_model.layers.13.self_attn.k_proj
+model.language_model.layers.13.self_attn.v_proj
+model.language_model.layers.13.self_attn.o_proj
+model.language_model.layers.13.mlp
+model.language_model.layers.13.mlp.gate_proj
+model.language_model.layers.13.mlp.up_proj
+model.language_model.layers.13.mlp.down_proj
+model.language_model.layers.13.mlp.act_fn
+model.language_model.layers.13.input_layernorm
+model.language_model.layers.13.post_attention_layernorm
+model.language_model.layers.14
+model.language_model.layers.14.self_attn
+model.language_model.layers.14.self_attn.q_proj
+model.language_model.layers.14.self_attn.k_proj
+model.language_model.layers.14.self_attn.v_proj
+model.language_model.layers.14.self_attn.o_proj
+model.language_model.layers.14.mlp
+model.language_model.layers.14.mlp.gate_proj
+model.language_model.layers.14.mlp.up_proj
+model.language_model.layers.14.mlp.down_proj
+model.language_model.layers.14.mlp.act_fn
+model.language_model.layers.14.input_layernorm
+model.language_model.layers.14.post_attention_layernorm
+model.language_model.layers.15
+model.language_model.layers.15.self_attn
+model.language_model.layers.15.self_attn.q_proj
+model.language_model.layers.15.self_attn.k_proj
+model.language_model.layers.15.self_attn.v_proj
+model.language_model.layers.15.self_attn.o_proj
+model.language_model.layers.15.mlp
+model.language_model.layers.15.mlp.gate_proj
+model.language_model.layers.15.mlp.up_proj
+model.language_model.layers.15.mlp.down_proj
+model.language_model.layers.15.mlp.act_fn
+model.language_model.layers.15.input_layernorm
+model.language_model.layers.15.post_attention_layernorm
+model.language_model.norm
+model.language_model.rotary_emb
+lm_head
diff --git a/logs/internlm/internlm-xcomposer2d5-7b.txt b/logs/internlm/internlm-xcomposer2d5-7b.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5aaffd13b5eecd3345f2d9edede652a28bbeb2db
--- /dev/null
+++ b/logs/internlm/internlm-xcomposer2d5-7b.txt
@@ -0,0 +1,2132 @@
+
+model
+model.tok_embeddings
+model.layers
+model.layers.0
+model.layers.0.attention
+model.layers.0.attention.wqkv
+model.layers.0.attention.wqkv.lora_dropout
+model.layers.0.attention.wqkv.Plora_A
+model.layers.0.attention.wqkv.Plora_B
+model.layers.0.attention.wqkv.lora_sft_A
+model.layers.0.attention.wqkv.lora_sft_B
+model.layers.0.attention.wqkv.lora_dpo_A
+model.layers.0.attention.wqkv.lora_dpo_B
+model.layers.0.attention.wqkv.lora_web_A
+model.layers.0.attention.wqkv.lora_web_B
+model.layers.0.attention.wo
+model.layers.0.attention.wo.lora_dropout
+model.layers.0.attention.wo.Plora_A
+model.layers.0.attention.wo.Plora_B
+model.layers.0.attention.wo.lora_sft_A
+model.layers.0.attention.wo.lora_sft_B
+model.layers.0.attention.wo.lora_dpo_A
+model.layers.0.attention.wo.lora_dpo_B
+model.layers.0.attention.wo.lora_web_A
+model.layers.0.attention.wo.lora_web_B
+model.layers.0.attention.rotary_emb
+model.layers.0.feed_forward
+model.layers.0.feed_forward.w1
+model.layers.0.feed_forward.w1.lora_dropout
+model.layers.0.feed_forward.w1.Plora_A
+model.layers.0.feed_forward.w1.Plora_B
+model.layers.0.feed_forward.w1.lora_sft_A
+model.layers.0.feed_forward.w1.lora_sft_B
+model.layers.0.feed_forward.w1.lora_dpo_A
+model.layers.0.feed_forward.w1.lora_dpo_B
+model.layers.0.feed_forward.w1.lora_web_A
+model.layers.0.feed_forward.w1.lora_web_B
+model.layers.0.feed_forward.w3
+model.layers.0.feed_forward.w3.lora_dropout
+model.layers.0.feed_forward.w3.Plora_A
+model.layers.0.feed_forward.w3.Plora_B
+model.layers.0.feed_forward.w3.lora_sft_A
+model.layers.0.feed_forward.w3.lora_sft_B
+model.layers.0.feed_forward.w3.lora_dpo_A
+model.layers.0.feed_forward.w3.lora_dpo_B
+model.layers.0.feed_forward.w3.lora_web_A
+model.layers.0.feed_forward.w3.lora_web_B
+model.layers.0.feed_forward.w2
+model.layers.0.feed_forward.w2.lora_dropout
+model.layers.0.feed_forward.w2.Plora_A
+model.layers.0.feed_forward.w2.Plora_B
+model.layers.0.feed_forward.w2.lora_sft_A
+model.layers.0.feed_forward.w2.lora_sft_B
+model.layers.0.feed_forward.w2.lora_dpo_A
+model.layers.0.feed_forward.w2.lora_dpo_B
+model.layers.0.feed_forward.w2.lora_web_A
+model.layers.0.feed_forward.w2.lora_web_B
+model.layers.0.feed_forward.act_fn
+model.layers.0.attention_norm
+model.layers.0.ffn_norm
+model.layers.1
+model.layers.1.attention
+model.layers.1.attention.wqkv
+model.layers.1.attention.wqkv.lora_dropout
+model.layers.1.attention.wqkv.Plora_A
+model.layers.1.attention.wqkv.Plora_B
+model.layers.1.attention.wqkv.lora_sft_A
+model.layers.1.attention.wqkv.lora_sft_B
+model.layers.1.attention.wqkv.lora_dpo_A
+model.layers.1.attention.wqkv.lora_dpo_B
+model.layers.1.attention.wqkv.lora_web_A
+model.layers.1.attention.wqkv.lora_web_B
+model.layers.1.attention.wo
+model.layers.1.attention.wo.lora_dropout
+model.layers.1.attention.wo.Plora_A
+model.layers.1.attention.wo.Plora_B
+model.layers.1.attention.wo.lora_sft_A
+model.layers.1.attention.wo.lora_sft_B
+model.layers.1.attention.wo.lora_dpo_A
+model.layers.1.attention.wo.lora_dpo_B
+model.layers.1.attention.wo.lora_web_A
+model.layers.1.attention.wo.lora_web_B
+model.layers.1.attention.rotary_emb
+model.layers.1.feed_forward
+model.layers.1.feed_forward.w1
+model.layers.1.feed_forward.w1.lora_dropout
+model.layers.1.feed_forward.w1.Plora_A
+model.layers.1.feed_forward.w1.Plora_B
+model.layers.1.feed_forward.w1.lora_sft_A
+model.layers.1.feed_forward.w1.lora_sft_B
+model.layers.1.feed_forward.w1.lora_dpo_A
+model.layers.1.feed_forward.w1.lora_dpo_B
+model.layers.1.feed_forward.w1.lora_web_A
+model.layers.1.feed_forward.w1.lora_web_B
+model.layers.1.feed_forward.w3
+model.layers.1.feed_forward.w3.lora_dropout
+model.layers.1.feed_forward.w3.Plora_A
+model.layers.1.feed_forward.w3.Plora_B
+model.layers.1.feed_forward.w3.lora_sft_A
+model.layers.1.feed_forward.w3.lora_sft_B
+model.layers.1.feed_forward.w3.lora_dpo_A
+model.layers.1.feed_forward.w3.lora_dpo_B
+model.layers.1.feed_forward.w3.lora_web_A
+model.layers.1.feed_forward.w3.lora_web_B
+model.layers.1.feed_forward.w2
+model.layers.1.feed_forward.w2.lora_dropout
+model.layers.1.feed_forward.w2.Plora_A
+model.layers.1.feed_forward.w2.Plora_B
+model.layers.1.feed_forward.w2.lora_sft_A
+model.layers.1.feed_forward.w2.lora_sft_B
+model.layers.1.feed_forward.w2.lora_dpo_A
+model.layers.1.feed_forward.w2.lora_dpo_B
+model.layers.1.feed_forward.w2.lora_web_A
+model.layers.1.feed_forward.w2.lora_web_B
+model.layers.1.feed_forward.act_fn
+model.layers.1.attention_norm
+model.layers.1.ffn_norm
+model.layers.2
+model.layers.2.attention
+model.layers.2.attention.wqkv
+model.layers.2.attention.wqkv.lora_dropout
+model.layers.2.attention.wqkv.Plora_A
+model.layers.2.attention.wqkv.Plora_B
+model.layers.2.attention.wqkv.lora_sft_A
+model.layers.2.attention.wqkv.lora_sft_B
+model.layers.2.attention.wqkv.lora_dpo_A
+model.layers.2.attention.wqkv.lora_dpo_B
+model.layers.2.attention.wqkv.lora_web_A
+model.layers.2.attention.wqkv.lora_web_B
+model.layers.2.attention.wo
+model.layers.2.attention.wo.lora_dropout
+model.layers.2.attention.wo.Plora_A
+model.layers.2.attention.wo.Plora_B
+model.layers.2.attention.wo.lora_sft_A
+model.layers.2.attention.wo.lora_sft_B
+model.layers.2.attention.wo.lora_dpo_A
+model.layers.2.attention.wo.lora_dpo_B
+model.layers.2.attention.wo.lora_web_A
+model.layers.2.attention.wo.lora_web_B
+model.layers.2.attention.rotary_emb
+model.layers.2.feed_forward
+model.layers.2.feed_forward.w1
+model.layers.2.feed_forward.w1.lora_dropout
+model.layers.2.feed_forward.w1.Plora_A
+model.layers.2.feed_forward.w1.Plora_B
+model.layers.2.feed_forward.w1.lora_sft_A
+model.layers.2.feed_forward.w1.lora_sft_B
+model.layers.2.feed_forward.w1.lora_dpo_A
+model.layers.2.feed_forward.w1.lora_dpo_B
+model.layers.2.feed_forward.w1.lora_web_A
+model.layers.2.feed_forward.w1.lora_web_B
+model.layers.2.feed_forward.w3
+model.layers.2.feed_forward.w3.lora_dropout
+model.layers.2.feed_forward.w3.Plora_A
+model.layers.2.feed_forward.w3.Plora_B
+model.layers.2.feed_forward.w3.lora_sft_A
+model.layers.2.feed_forward.w3.lora_sft_B
+model.layers.2.feed_forward.w3.lora_dpo_A
+model.layers.2.feed_forward.w3.lora_dpo_B
+model.layers.2.feed_forward.w3.lora_web_A
+model.layers.2.feed_forward.w3.lora_web_B
+model.layers.2.feed_forward.w2
+model.layers.2.feed_forward.w2.lora_dropout
+model.layers.2.feed_forward.w2.Plora_A
+model.layers.2.feed_forward.w2.Plora_B
+model.layers.2.feed_forward.w2.lora_sft_A
+model.layers.2.feed_forward.w2.lora_sft_B
+model.layers.2.feed_forward.w2.lora_dpo_A
+model.layers.2.feed_forward.w2.lora_dpo_B
+model.layers.2.feed_forward.w2.lora_web_A
+model.layers.2.feed_forward.w2.lora_web_B
+model.layers.2.feed_forward.act_fn
+model.layers.2.attention_norm
+model.layers.2.ffn_norm
+model.layers.3
+model.layers.3.attention
+model.layers.3.attention.wqkv
+model.layers.3.attention.wqkv.lora_dropout
+model.layers.3.attention.wqkv.Plora_A
+model.layers.3.attention.wqkv.Plora_B
+model.layers.3.attention.wqkv.lora_sft_A
+model.layers.3.attention.wqkv.lora_sft_B
+model.layers.3.attention.wqkv.lora_dpo_A
+model.layers.3.attention.wqkv.lora_dpo_B
+model.layers.3.attention.wqkv.lora_web_A
+model.layers.3.attention.wqkv.lora_web_B
+model.layers.3.attention.wo
+model.layers.3.attention.wo.lora_dropout
+model.layers.3.attention.wo.Plora_A
+model.layers.3.attention.wo.Plora_B
+model.layers.3.attention.wo.lora_sft_A
+model.layers.3.attention.wo.lora_sft_B
+model.layers.3.attention.wo.lora_dpo_A
+model.layers.3.attention.wo.lora_dpo_B
+model.layers.3.attention.wo.lora_web_A
+model.layers.3.attention.wo.lora_web_B
+model.layers.3.attention.rotary_emb
+model.layers.3.feed_forward
+model.layers.3.feed_forward.w1
+model.layers.3.feed_forward.w1.lora_dropout
+model.layers.3.feed_forward.w1.Plora_A
+model.layers.3.feed_forward.w1.Plora_B
+model.layers.3.feed_forward.w1.lora_sft_A
+model.layers.3.feed_forward.w1.lora_sft_B
+model.layers.3.feed_forward.w1.lora_dpo_A
+model.layers.3.feed_forward.w1.lora_dpo_B
+model.layers.3.feed_forward.w1.lora_web_A
+model.layers.3.feed_forward.w1.lora_web_B
+model.layers.3.feed_forward.w3
+model.layers.3.feed_forward.w3.lora_dropout
+model.layers.3.feed_forward.w3.Plora_A
+model.layers.3.feed_forward.w3.Plora_B
+model.layers.3.feed_forward.w3.lora_sft_A
+model.layers.3.feed_forward.w3.lora_sft_B
+model.layers.3.feed_forward.w3.lora_dpo_A
+model.layers.3.feed_forward.w3.lora_dpo_B
+model.layers.3.feed_forward.w3.lora_web_A
+model.layers.3.feed_forward.w3.lora_web_B
+model.layers.3.feed_forward.w2
+model.layers.3.feed_forward.w2.lora_dropout
+model.layers.3.feed_forward.w2.Plora_A
+model.layers.3.feed_forward.w2.Plora_B
+model.layers.3.feed_forward.w2.lora_sft_A
+model.layers.3.feed_forward.w2.lora_sft_B
+model.layers.3.feed_forward.w2.lora_dpo_A
+model.layers.3.feed_forward.w2.lora_dpo_B
+model.layers.3.feed_forward.w2.lora_web_A
+model.layers.3.feed_forward.w2.lora_web_B
+model.layers.3.feed_forward.act_fn
+model.layers.3.attention_norm
+model.layers.3.ffn_norm
+model.layers.4
+model.layers.4.attention
+model.layers.4.attention.wqkv
+model.layers.4.attention.wqkv.lora_dropout
+model.layers.4.attention.wqkv.Plora_A
+model.layers.4.attention.wqkv.Plora_B
+model.layers.4.attention.wqkv.lora_sft_A
+model.layers.4.attention.wqkv.lora_sft_B
+model.layers.4.attention.wqkv.lora_dpo_A
+model.layers.4.attention.wqkv.lora_dpo_B
+model.layers.4.attention.wqkv.lora_web_A
+model.layers.4.attention.wqkv.lora_web_B
+model.layers.4.attention.wo
+model.layers.4.attention.wo.lora_dropout
+model.layers.4.attention.wo.Plora_A
+model.layers.4.attention.wo.Plora_B
+model.layers.4.attention.wo.lora_sft_A
+model.layers.4.attention.wo.lora_sft_B
+model.layers.4.attention.wo.lora_dpo_A
+model.layers.4.attention.wo.lora_dpo_B
+model.layers.4.attention.wo.lora_web_A
+model.layers.4.attention.wo.lora_web_B
+model.layers.4.attention.rotary_emb
+model.layers.4.feed_forward
+model.layers.4.feed_forward.w1
+model.layers.4.feed_forward.w1.lora_dropout
+model.layers.4.feed_forward.w1.Plora_A
+model.layers.4.feed_forward.w1.Plora_B
+model.layers.4.feed_forward.w1.lora_sft_A
+model.layers.4.feed_forward.w1.lora_sft_B
+model.layers.4.feed_forward.w1.lora_dpo_A
+model.layers.4.feed_forward.w1.lora_dpo_B
+model.layers.4.feed_forward.w1.lora_web_A
+model.layers.4.feed_forward.w1.lora_web_B
+model.layers.4.feed_forward.w3
+model.layers.4.feed_forward.w3.lora_dropout
+model.layers.4.feed_forward.w3.Plora_A
+model.layers.4.feed_forward.w3.Plora_B
+model.layers.4.feed_forward.w3.lora_sft_A
+model.layers.4.feed_forward.w3.lora_sft_B
+model.layers.4.feed_forward.w3.lora_dpo_A
+model.layers.4.feed_forward.w3.lora_dpo_B
+model.layers.4.feed_forward.w3.lora_web_A
+model.layers.4.feed_forward.w3.lora_web_B
+model.layers.4.feed_forward.w2
+model.layers.4.feed_forward.w2.lora_dropout
+model.layers.4.feed_forward.w2.Plora_A
+model.layers.4.feed_forward.w2.Plora_B
+model.layers.4.feed_forward.w2.lora_sft_A
+model.layers.4.feed_forward.w2.lora_sft_B
+model.layers.4.feed_forward.w2.lora_dpo_A
+model.layers.4.feed_forward.w2.lora_dpo_B
+model.layers.4.feed_forward.w2.lora_web_A
+model.layers.4.feed_forward.w2.lora_web_B
+model.layers.4.feed_forward.act_fn
+model.layers.4.attention_norm
+model.layers.4.ffn_norm
+model.layers.5
+model.layers.5.attention
+model.layers.5.attention.wqkv
+model.layers.5.attention.wqkv.lora_dropout
+model.layers.5.attention.wqkv.Plora_A
+model.layers.5.attention.wqkv.Plora_B
+model.layers.5.attention.wqkv.lora_sft_A
+model.layers.5.attention.wqkv.lora_sft_B
+model.layers.5.attention.wqkv.lora_dpo_A
+model.layers.5.attention.wqkv.lora_dpo_B
+model.layers.5.attention.wqkv.lora_web_A
+model.layers.5.attention.wqkv.lora_web_B
+model.layers.5.attention.wo
+model.layers.5.attention.wo.lora_dropout
+model.layers.5.attention.wo.Plora_A
+model.layers.5.attention.wo.Plora_B
+model.layers.5.attention.wo.lora_sft_A
+model.layers.5.attention.wo.lora_sft_B
+model.layers.5.attention.wo.lora_dpo_A
+model.layers.5.attention.wo.lora_dpo_B
+model.layers.5.attention.wo.lora_web_A
+model.layers.5.attention.wo.lora_web_B
+model.layers.5.attention.rotary_emb
+model.layers.5.feed_forward
+model.layers.5.feed_forward.w1
+model.layers.5.feed_forward.w1.lora_dropout
+model.layers.5.feed_forward.w1.Plora_A
+model.layers.5.feed_forward.w1.Plora_B
+model.layers.5.feed_forward.w1.lora_sft_A
+model.layers.5.feed_forward.w1.lora_sft_B
+model.layers.5.feed_forward.w1.lora_dpo_A
+model.layers.5.feed_forward.w1.lora_dpo_B
+model.layers.5.feed_forward.w1.lora_web_A
+model.layers.5.feed_forward.w1.lora_web_B
+model.layers.5.feed_forward.w3
+model.layers.5.feed_forward.w3.lora_dropout
+model.layers.5.feed_forward.w3.Plora_A
+model.layers.5.feed_forward.w3.Plora_B
+model.layers.5.feed_forward.w3.lora_sft_A
+model.layers.5.feed_forward.w3.lora_sft_B
+model.layers.5.feed_forward.w3.lora_dpo_A
+model.layers.5.feed_forward.w3.lora_dpo_B
+model.layers.5.feed_forward.w3.lora_web_A
+model.layers.5.feed_forward.w3.lora_web_B
+model.layers.5.feed_forward.w2
+model.layers.5.feed_forward.w2.lora_dropout
+model.layers.5.feed_forward.w2.Plora_A
+model.layers.5.feed_forward.w2.Plora_B
+model.layers.5.feed_forward.w2.lora_sft_A
+model.layers.5.feed_forward.w2.lora_sft_B
+model.layers.5.feed_forward.w2.lora_dpo_A
+model.layers.5.feed_forward.w2.lora_dpo_B
+model.layers.5.feed_forward.w2.lora_web_A
+model.layers.5.feed_forward.w2.lora_web_B
+model.layers.5.feed_forward.act_fn
+model.layers.5.attention_norm
+model.layers.5.ffn_norm
+model.layers.6
+model.layers.6.attention
+model.layers.6.attention.wqkv
+model.layers.6.attention.wqkv.lora_dropout
+model.layers.6.attention.wqkv.Plora_A
+model.layers.6.attention.wqkv.Plora_B
+model.layers.6.attention.wqkv.lora_sft_A
+model.layers.6.attention.wqkv.lora_sft_B
+model.layers.6.attention.wqkv.lora_dpo_A
+model.layers.6.attention.wqkv.lora_dpo_B
+model.layers.6.attention.wqkv.lora_web_A
+model.layers.6.attention.wqkv.lora_web_B
+model.layers.6.attention.wo
+model.layers.6.attention.wo.lora_dropout
+model.layers.6.attention.wo.Plora_A
+model.layers.6.attention.wo.Plora_B
+model.layers.6.attention.wo.lora_sft_A
+model.layers.6.attention.wo.lora_sft_B
+model.layers.6.attention.wo.lora_dpo_A
+model.layers.6.attention.wo.lora_dpo_B
+model.layers.6.attention.wo.lora_web_A
+model.layers.6.attention.wo.lora_web_B
+model.layers.6.attention.rotary_emb
+model.layers.6.feed_forward
+model.layers.6.feed_forward.w1
+model.layers.6.feed_forward.w1.lora_dropout
+model.layers.6.feed_forward.w1.Plora_A
+model.layers.6.feed_forward.w1.Plora_B
+model.layers.6.feed_forward.w1.lora_sft_A
+model.layers.6.feed_forward.w1.lora_sft_B
+model.layers.6.feed_forward.w1.lora_dpo_A
+model.layers.6.feed_forward.w1.lora_dpo_B
+model.layers.6.feed_forward.w1.lora_web_A
+model.layers.6.feed_forward.w1.lora_web_B
+model.layers.6.feed_forward.w3
+model.layers.6.feed_forward.w3.lora_dropout
+model.layers.6.feed_forward.w3.Plora_A
+model.layers.6.feed_forward.w3.Plora_B
+model.layers.6.feed_forward.w3.lora_sft_A
+model.layers.6.feed_forward.w3.lora_sft_B
+model.layers.6.feed_forward.w3.lora_dpo_A
+model.layers.6.feed_forward.w3.lora_dpo_B
+model.layers.6.feed_forward.w3.lora_web_A
+model.layers.6.feed_forward.w3.lora_web_B
+model.layers.6.feed_forward.w2
+model.layers.6.feed_forward.w2.lora_dropout
+model.layers.6.feed_forward.w2.Plora_A
+model.layers.6.feed_forward.w2.Plora_B
+model.layers.6.feed_forward.w2.lora_sft_A
+model.layers.6.feed_forward.w2.lora_sft_B
+model.layers.6.feed_forward.w2.lora_dpo_A
+model.layers.6.feed_forward.w2.lora_dpo_B
+model.layers.6.feed_forward.w2.lora_web_A
+model.layers.6.feed_forward.w2.lora_web_B
+model.layers.6.feed_forward.act_fn
+model.layers.6.attention_norm
+model.layers.6.ffn_norm
+model.layers.7
+model.layers.7.attention
+model.layers.7.attention.wqkv
+model.layers.7.attention.wqkv.lora_dropout
+model.layers.7.attention.wqkv.Plora_A
+model.layers.7.attention.wqkv.Plora_B
+model.layers.7.attention.wqkv.lora_sft_A
+model.layers.7.attention.wqkv.lora_sft_B
+model.layers.7.attention.wqkv.lora_dpo_A
+model.layers.7.attention.wqkv.lora_dpo_B
+model.layers.7.attention.wqkv.lora_web_A
+model.layers.7.attention.wqkv.lora_web_B
+model.layers.7.attention.wo
+model.layers.7.attention.wo.lora_dropout
+model.layers.7.attention.wo.Plora_A
+model.layers.7.attention.wo.Plora_B
+model.layers.7.attention.wo.lora_sft_A
+model.layers.7.attention.wo.lora_sft_B
+model.layers.7.attention.wo.lora_dpo_A
+model.layers.7.attention.wo.lora_dpo_B
+model.layers.7.attention.wo.lora_web_A
+model.layers.7.attention.wo.lora_web_B
+model.layers.7.attention.rotary_emb
+model.layers.7.feed_forward
+model.layers.7.feed_forward.w1
+model.layers.7.feed_forward.w1.lora_dropout
+model.layers.7.feed_forward.w1.Plora_A
+model.layers.7.feed_forward.w1.Plora_B
+model.layers.7.feed_forward.w1.lora_sft_A
+model.layers.7.feed_forward.w1.lora_sft_B
+model.layers.7.feed_forward.w1.lora_dpo_A
+model.layers.7.feed_forward.w1.lora_dpo_B
+model.layers.7.feed_forward.w1.lora_web_A
+model.layers.7.feed_forward.w1.lora_web_B
+model.layers.7.feed_forward.w3
+model.layers.7.feed_forward.w3.lora_dropout
+model.layers.7.feed_forward.w3.Plora_A
+model.layers.7.feed_forward.w3.Plora_B
+model.layers.7.feed_forward.w3.lora_sft_A
+model.layers.7.feed_forward.w3.lora_sft_B
+model.layers.7.feed_forward.w3.lora_dpo_A
+model.layers.7.feed_forward.w3.lora_dpo_B
+model.layers.7.feed_forward.w3.lora_web_A
+model.layers.7.feed_forward.w3.lora_web_B
+model.layers.7.feed_forward.w2
+model.layers.7.feed_forward.w2.lora_dropout
+model.layers.7.feed_forward.w2.Plora_A
+model.layers.7.feed_forward.w2.Plora_B
+model.layers.7.feed_forward.w2.lora_sft_A
+model.layers.7.feed_forward.w2.lora_sft_B
+model.layers.7.feed_forward.w2.lora_dpo_A
+model.layers.7.feed_forward.w2.lora_dpo_B
+model.layers.7.feed_forward.w2.lora_web_A
+model.layers.7.feed_forward.w2.lora_web_B
+model.layers.7.feed_forward.act_fn
+model.layers.7.attention_norm
+model.layers.7.ffn_norm
+model.layers.8
+model.layers.8.attention
+model.layers.8.attention.wqkv
+model.layers.8.attention.wqkv.lora_dropout
+model.layers.8.attention.wqkv.Plora_A
+model.layers.8.attention.wqkv.Plora_B
+model.layers.8.attention.wqkv.lora_sft_A
+model.layers.8.attention.wqkv.lora_sft_B
+model.layers.8.attention.wqkv.lora_dpo_A
+model.layers.8.attention.wqkv.lora_dpo_B
+model.layers.8.attention.wqkv.lora_web_A
+model.layers.8.attention.wqkv.lora_web_B
+model.layers.8.attention.wo
+model.layers.8.attention.wo.lora_dropout
+model.layers.8.attention.wo.Plora_A
+model.layers.8.attention.wo.Plora_B
+model.layers.8.attention.wo.lora_sft_A
+model.layers.8.attention.wo.lora_sft_B
+model.layers.8.attention.wo.lora_dpo_A
+model.layers.8.attention.wo.lora_dpo_B
+model.layers.8.attention.wo.lora_web_A
+model.layers.8.attention.wo.lora_web_B
+model.layers.8.attention.rotary_emb
+model.layers.8.feed_forward
+model.layers.8.feed_forward.w1
+model.layers.8.feed_forward.w1.lora_dropout
+model.layers.8.feed_forward.w1.Plora_A
+model.layers.8.feed_forward.w1.Plora_B
+model.layers.8.feed_forward.w1.lora_sft_A
+model.layers.8.feed_forward.w1.lora_sft_B
+model.layers.8.feed_forward.w1.lora_dpo_A
+model.layers.8.feed_forward.w1.lora_dpo_B
+model.layers.8.feed_forward.w1.lora_web_A
+model.layers.8.feed_forward.w1.lora_web_B
+model.layers.8.feed_forward.w3
+model.layers.8.feed_forward.w3.lora_dropout
+model.layers.8.feed_forward.w3.Plora_A
+model.layers.8.feed_forward.w3.Plora_B
+model.layers.8.feed_forward.w3.lora_sft_A
+model.layers.8.feed_forward.w3.lora_sft_B
+model.layers.8.feed_forward.w3.lora_dpo_A
+model.layers.8.feed_forward.w3.lora_dpo_B
+model.layers.8.feed_forward.w3.lora_web_A
+model.layers.8.feed_forward.w3.lora_web_B
+model.layers.8.feed_forward.w2
+model.layers.8.feed_forward.w2.lora_dropout
+model.layers.8.feed_forward.w2.Plora_A
+model.layers.8.feed_forward.w2.Plora_B
+model.layers.8.feed_forward.w2.lora_sft_A
+model.layers.8.feed_forward.w2.lora_sft_B
+model.layers.8.feed_forward.w2.lora_dpo_A
+model.layers.8.feed_forward.w2.lora_dpo_B
+model.layers.8.feed_forward.w2.lora_web_A
+model.layers.8.feed_forward.w2.lora_web_B
+model.layers.8.feed_forward.act_fn
+model.layers.8.attention_norm
+model.layers.8.ffn_norm
+model.layers.9
+model.layers.9.attention
+model.layers.9.attention.wqkv
+model.layers.9.attention.wqkv.lora_dropout
+model.layers.9.attention.wqkv.Plora_A
+model.layers.9.attention.wqkv.Plora_B
+model.layers.9.attention.wqkv.lora_sft_A
+model.layers.9.attention.wqkv.lora_sft_B
+model.layers.9.attention.wqkv.lora_dpo_A
+model.layers.9.attention.wqkv.lora_dpo_B
+model.layers.9.attention.wqkv.lora_web_A
+model.layers.9.attention.wqkv.lora_web_B
+model.layers.9.attention.wo
+model.layers.9.attention.wo.lora_dropout
+model.layers.9.attention.wo.Plora_A
+model.layers.9.attention.wo.Plora_B
+model.layers.9.attention.wo.lora_sft_A
+model.layers.9.attention.wo.lora_sft_B
+model.layers.9.attention.wo.lora_dpo_A
+model.layers.9.attention.wo.lora_dpo_B
+model.layers.9.attention.wo.lora_web_A
+model.layers.9.attention.wo.lora_web_B
+model.layers.9.attention.rotary_emb
+model.layers.9.feed_forward
+model.layers.9.feed_forward.w1
+model.layers.9.feed_forward.w1.lora_dropout
+model.layers.9.feed_forward.w1.Plora_A
+model.layers.9.feed_forward.w1.Plora_B
+model.layers.9.feed_forward.w1.lora_sft_A
+model.layers.9.feed_forward.w1.lora_sft_B
+model.layers.9.feed_forward.w1.lora_dpo_A
+model.layers.9.feed_forward.w1.lora_dpo_B
+model.layers.9.feed_forward.w1.lora_web_A
+model.layers.9.feed_forward.w1.lora_web_B
+model.layers.9.feed_forward.w3
+model.layers.9.feed_forward.w3.lora_dropout
+model.layers.9.feed_forward.w3.Plora_A
+model.layers.9.feed_forward.w3.Plora_B
+model.layers.9.feed_forward.w3.lora_sft_A
+model.layers.9.feed_forward.w3.lora_sft_B
+model.layers.9.feed_forward.w3.lora_dpo_A
+model.layers.9.feed_forward.w3.lora_dpo_B
+model.layers.9.feed_forward.w3.lora_web_A
+model.layers.9.feed_forward.w3.lora_web_B
+model.layers.9.feed_forward.w2
+model.layers.9.feed_forward.w2.lora_dropout
+model.layers.9.feed_forward.w2.Plora_A
+model.layers.9.feed_forward.w2.Plora_B
+model.layers.9.feed_forward.w2.lora_sft_A
+model.layers.9.feed_forward.w2.lora_sft_B
+model.layers.9.feed_forward.w2.lora_dpo_A
+model.layers.9.feed_forward.w2.lora_dpo_B
+model.layers.9.feed_forward.w2.lora_web_A
+model.layers.9.feed_forward.w2.lora_web_B
+model.layers.9.feed_forward.act_fn
+model.layers.9.attention_norm
+model.layers.9.ffn_norm
+model.layers.10
+model.layers.10.attention
+model.layers.10.attention.wqkv
+model.layers.10.attention.wqkv.lora_dropout
+model.layers.10.attention.wqkv.Plora_A
+model.layers.10.attention.wqkv.Plora_B
+model.layers.10.attention.wqkv.lora_sft_A
+model.layers.10.attention.wqkv.lora_sft_B
+model.layers.10.attention.wqkv.lora_dpo_A
+model.layers.10.attention.wqkv.lora_dpo_B
+model.layers.10.attention.wqkv.lora_web_A
+model.layers.10.attention.wqkv.lora_web_B
+model.layers.10.attention.wo
+model.layers.10.attention.wo.lora_dropout
+model.layers.10.attention.wo.Plora_A
+model.layers.10.attention.wo.Plora_B
+model.layers.10.attention.wo.lora_sft_A
+model.layers.10.attention.wo.lora_sft_B
+model.layers.10.attention.wo.lora_dpo_A
+model.layers.10.attention.wo.lora_dpo_B
+model.layers.10.attention.wo.lora_web_A
+model.layers.10.attention.wo.lora_web_B
+model.layers.10.attention.rotary_emb
+model.layers.10.feed_forward
+model.layers.10.feed_forward.w1
+model.layers.10.feed_forward.w1.lora_dropout
+model.layers.10.feed_forward.w1.Plora_A
+model.layers.10.feed_forward.w1.Plora_B
+model.layers.10.feed_forward.w1.lora_sft_A
+model.layers.10.feed_forward.w1.lora_sft_B
+model.layers.10.feed_forward.w1.lora_dpo_A
+model.layers.10.feed_forward.w1.lora_dpo_B
+model.layers.10.feed_forward.w1.lora_web_A
+model.layers.10.feed_forward.w1.lora_web_B
+model.layers.10.feed_forward.w3
+model.layers.10.feed_forward.w3.lora_dropout
+model.layers.10.feed_forward.w3.Plora_A
+model.layers.10.feed_forward.w3.Plora_B
+model.layers.10.feed_forward.w3.lora_sft_A
+model.layers.10.feed_forward.w3.lora_sft_B
+model.layers.10.feed_forward.w3.lora_dpo_A
+model.layers.10.feed_forward.w3.lora_dpo_B
+model.layers.10.feed_forward.w3.lora_web_A
+model.layers.10.feed_forward.w3.lora_web_B
+model.layers.10.feed_forward.w2
+model.layers.10.feed_forward.w2.lora_dropout
+model.layers.10.feed_forward.w2.Plora_A
+model.layers.10.feed_forward.w2.Plora_B
+model.layers.10.feed_forward.w2.lora_sft_A
+model.layers.10.feed_forward.w2.lora_sft_B
+model.layers.10.feed_forward.w2.lora_dpo_A
+model.layers.10.feed_forward.w2.lora_dpo_B
+model.layers.10.feed_forward.w2.lora_web_A
+model.layers.10.feed_forward.w2.lora_web_B
+model.layers.10.feed_forward.act_fn
+model.layers.10.attention_norm
+model.layers.10.ffn_norm
+model.layers.11
+model.layers.11.attention
+model.layers.11.attention.wqkv
+model.layers.11.attention.wqkv.lora_dropout
+model.layers.11.attention.wqkv.Plora_A
+model.layers.11.attention.wqkv.Plora_B
+model.layers.11.attention.wqkv.lora_sft_A
+model.layers.11.attention.wqkv.lora_sft_B
+model.layers.11.attention.wqkv.lora_dpo_A
+model.layers.11.attention.wqkv.lora_dpo_B
+model.layers.11.attention.wqkv.lora_web_A
+model.layers.11.attention.wqkv.lora_web_B
+model.layers.11.attention.wo
+model.layers.11.attention.wo.lora_dropout
+model.layers.11.attention.wo.Plora_A
+model.layers.11.attention.wo.Plora_B
+model.layers.11.attention.wo.lora_sft_A
+model.layers.11.attention.wo.lora_sft_B
+model.layers.11.attention.wo.lora_dpo_A
+model.layers.11.attention.wo.lora_dpo_B
+model.layers.11.attention.wo.lora_web_A
+model.layers.11.attention.wo.lora_web_B
+model.layers.11.attention.rotary_emb
+model.layers.11.feed_forward
+model.layers.11.feed_forward.w1
+model.layers.11.feed_forward.w1.lora_dropout
+model.layers.11.feed_forward.w1.Plora_A
+model.layers.11.feed_forward.w1.Plora_B
+model.layers.11.feed_forward.w1.lora_sft_A
+model.layers.11.feed_forward.w1.lora_sft_B
+model.layers.11.feed_forward.w1.lora_dpo_A
+model.layers.11.feed_forward.w1.lora_dpo_B
+model.layers.11.feed_forward.w1.lora_web_A
+model.layers.11.feed_forward.w1.lora_web_B
+model.layers.11.feed_forward.w3
+model.layers.11.feed_forward.w3.lora_dropout
+model.layers.11.feed_forward.w3.Plora_A
+model.layers.11.feed_forward.w3.Plora_B
+model.layers.11.feed_forward.w3.lora_sft_A
+model.layers.11.feed_forward.w3.lora_sft_B
+model.layers.11.feed_forward.w3.lora_dpo_A
+model.layers.11.feed_forward.w3.lora_dpo_B
+model.layers.11.feed_forward.w3.lora_web_A
+model.layers.11.feed_forward.w3.lora_web_B
+model.layers.11.feed_forward.w2
+model.layers.11.feed_forward.w2.lora_dropout
+model.layers.11.feed_forward.w2.Plora_A
+model.layers.11.feed_forward.w2.Plora_B
+model.layers.11.feed_forward.w2.lora_sft_A
+model.layers.11.feed_forward.w2.lora_sft_B
+model.layers.11.feed_forward.w2.lora_dpo_A
+model.layers.11.feed_forward.w2.lora_dpo_B
+model.layers.11.feed_forward.w2.lora_web_A
+model.layers.11.feed_forward.w2.lora_web_B
+model.layers.11.feed_forward.act_fn
+model.layers.11.attention_norm
+model.layers.11.ffn_norm
+model.layers.12
+model.layers.12.attention
+model.layers.12.attention.wqkv
+model.layers.12.attention.wqkv.lora_dropout
+model.layers.12.attention.wqkv.Plora_A
+model.layers.12.attention.wqkv.Plora_B
+model.layers.12.attention.wqkv.lora_sft_A
+model.layers.12.attention.wqkv.lora_sft_B
+model.layers.12.attention.wqkv.lora_dpo_A
+model.layers.12.attention.wqkv.lora_dpo_B
+model.layers.12.attention.wqkv.lora_web_A
+model.layers.12.attention.wqkv.lora_web_B
+model.layers.12.attention.wo
+model.layers.12.attention.wo.lora_dropout
+model.layers.12.attention.wo.Plora_A
+model.layers.12.attention.wo.Plora_B
+model.layers.12.attention.wo.lora_sft_A
+model.layers.12.attention.wo.lora_sft_B
+model.layers.12.attention.wo.lora_dpo_A
+model.layers.12.attention.wo.lora_dpo_B
+model.layers.12.attention.wo.lora_web_A
+model.layers.12.attention.wo.lora_web_B
+model.layers.12.attention.rotary_emb
+model.layers.12.feed_forward
+model.layers.12.feed_forward.w1
+model.layers.12.feed_forward.w1.lora_dropout
+model.layers.12.feed_forward.w1.Plora_A
+model.layers.12.feed_forward.w1.Plora_B
+model.layers.12.feed_forward.w1.lora_sft_A
+model.layers.12.feed_forward.w1.lora_sft_B
+model.layers.12.feed_forward.w1.lora_dpo_A
+model.layers.12.feed_forward.w1.lora_dpo_B
+model.layers.12.feed_forward.w1.lora_web_A
+model.layers.12.feed_forward.w1.lora_web_B
+model.layers.12.feed_forward.w3
+model.layers.12.feed_forward.w3.lora_dropout
+model.layers.12.feed_forward.w3.Plora_A
+model.layers.12.feed_forward.w3.Plora_B
+model.layers.12.feed_forward.w3.lora_sft_A
+model.layers.12.feed_forward.w3.lora_sft_B
+model.layers.12.feed_forward.w3.lora_dpo_A
+model.layers.12.feed_forward.w3.lora_dpo_B
+model.layers.12.feed_forward.w3.lora_web_A
+model.layers.12.feed_forward.w3.lora_web_B
+model.layers.12.feed_forward.w2
+model.layers.12.feed_forward.w2.lora_dropout
+model.layers.12.feed_forward.w2.Plora_A
+model.layers.12.feed_forward.w2.Plora_B
+model.layers.12.feed_forward.w2.lora_sft_A
+model.layers.12.feed_forward.w2.lora_sft_B
+model.layers.12.feed_forward.w2.lora_dpo_A
+model.layers.12.feed_forward.w2.lora_dpo_B
+model.layers.12.feed_forward.w2.lora_web_A
+model.layers.12.feed_forward.w2.lora_web_B
+model.layers.12.feed_forward.act_fn
+model.layers.12.attention_norm
+model.layers.12.ffn_norm
+model.layers.13
+model.layers.13.attention
+model.layers.13.attention.wqkv
+model.layers.13.attention.wqkv.lora_dropout
+model.layers.13.attention.wqkv.Plora_A
+model.layers.13.attention.wqkv.Plora_B
+model.layers.13.attention.wqkv.lora_sft_A
+model.layers.13.attention.wqkv.lora_sft_B
+model.layers.13.attention.wqkv.lora_dpo_A
+model.layers.13.attention.wqkv.lora_dpo_B
+model.layers.13.attention.wqkv.lora_web_A
+model.layers.13.attention.wqkv.lora_web_B
+model.layers.13.attention.wo
+model.layers.13.attention.wo.lora_dropout
+model.layers.13.attention.wo.Plora_A
+model.layers.13.attention.wo.Plora_B
+model.layers.13.attention.wo.lora_sft_A
+model.layers.13.attention.wo.lora_sft_B
+model.layers.13.attention.wo.lora_dpo_A
+model.layers.13.attention.wo.lora_dpo_B
+model.layers.13.attention.wo.lora_web_A
+model.layers.13.attention.wo.lora_web_B
+model.layers.13.attention.rotary_emb
+model.layers.13.feed_forward
+model.layers.13.feed_forward.w1
+model.layers.13.feed_forward.w1.lora_dropout
+model.layers.13.feed_forward.w1.Plora_A
+model.layers.13.feed_forward.w1.Plora_B
+model.layers.13.feed_forward.w1.lora_sft_A
+model.layers.13.feed_forward.w1.lora_sft_B
+model.layers.13.feed_forward.w1.lora_dpo_A
+model.layers.13.feed_forward.w1.lora_dpo_B
+model.layers.13.feed_forward.w1.lora_web_A
+model.layers.13.feed_forward.w1.lora_web_B
+model.layers.13.feed_forward.w3
+model.layers.13.feed_forward.w3.lora_dropout
+model.layers.13.feed_forward.w3.Plora_A
+model.layers.13.feed_forward.w3.Plora_B
+model.layers.13.feed_forward.w3.lora_sft_A
+model.layers.13.feed_forward.w3.lora_sft_B
+model.layers.13.feed_forward.w3.lora_dpo_A
+model.layers.13.feed_forward.w3.lora_dpo_B
+model.layers.13.feed_forward.w3.lora_web_A
+model.layers.13.feed_forward.w3.lora_web_B
+model.layers.13.feed_forward.w2
+model.layers.13.feed_forward.w2.lora_dropout
+model.layers.13.feed_forward.w2.Plora_A
+model.layers.13.feed_forward.w2.Plora_B
+model.layers.13.feed_forward.w2.lora_sft_A
+model.layers.13.feed_forward.w2.lora_sft_B
+model.layers.13.feed_forward.w2.lora_dpo_A
+model.layers.13.feed_forward.w2.lora_dpo_B
+model.layers.13.feed_forward.w2.lora_web_A
+model.layers.13.feed_forward.w2.lora_web_B
+model.layers.13.feed_forward.act_fn
+model.layers.13.attention_norm
+model.layers.13.ffn_norm
+model.layers.14
+model.layers.14.attention
+model.layers.14.attention.wqkv
+model.layers.14.attention.wqkv.lora_dropout
+model.layers.14.attention.wqkv.Plora_A
+model.layers.14.attention.wqkv.Plora_B
+model.layers.14.attention.wqkv.lora_sft_A
+model.layers.14.attention.wqkv.lora_sft_B
+model.layers.14.attention.wqkv.lora_dpo_A
+model.layers.14.attention.wqkv.lora_dpo_B
+model.layers.14.attention.wqkv.lora_web_A
+model.layers.14.attention.wqkv.lora_web_B
+model.layers.14.attention.wo
+model.layers.14.attention.wo.lora_dropout
+model.layers.14.attention.wo.Plora_A
+model.layers.14.attention.wo.Plora_B
+model.layers.14.attention.wo.lora_sft_A
+model.layers.14.attention.wo.lora_sft_B
+model.layers.14.attention.wo.lora_dpo_A
+model.layers.14.attention.wo.lora_dpo_B
+model.layers.14.attention.wo.lora_web_A
+model.layers.14.attention.wo.lora_web_B
+model.layers.14.attention.rotary_emb
+model.layers.14.feed_forward
+model.layers.14.feed_forward.w1
+model.layers.14.feed_forward.w1.lora_dropout
+model.layers.14.feed_forward.w1.Plora_A
+model.layers.14.feed_forward.w1.Plora_B
+model.layers.14.feed_forward.w1.lora_sft_A
+model.layers.14.feed_forward.w1.lora_sft_B
+model.layers.14.feed_forward.w1.lora_dpo_A
+model.layers.14.feed_forward.w1.lora_dpo_B
+model.layers.14.feed_forward.w1.lora_web_A
+model.layers.14.feed_forward.w1.lora_web_B
+model.layers.14.feed_forward.w3
+model.layers.14.feed_forward.w3.lora_dropout
+model.layers.14.feed_forward.w3.Plora_A
+model.layers.14.feed_forward.w3.Plora_B
+model.layers.14.feed_forward.w3.lora_sft_A
+model.layers.14.feed_forward.w3.lora_sft_B
+model.layers.14.feed_forward.w3.lora_dpo_A
+model.layers.14.feed_forward.w3.lora_dpo_B
+model.layers.14.feed_forward.w3.lora_web_A
+model.layers.14.feed_forward.w3.lora_web_B
+model.layers.14.feed_forward.w2
+model.layers.14.feed_forward.w2.lora_dropout
+model.layers.14.feed_forward.w2.Plora_A
+model.layers.14.feed_forward.w2.Plora_B
+model.layers.14.feed_forward.w2.lora_sft_A
+model.layers.14.feed_forward.w2.lora_sft_B
+model.layers.14.feed_forward.w2.lora_dpo_A
+model.layers.14.feed_forward.w2.lora_dpo_B
+model.layers.14.feed_forward.w2.lora_web_A
+model.layers.14.feed_forward.w2.lora_web_B
+model.layers.14.feed_forward.act_fn
+model.layers.14.attention_norm
+model.layers.14.ffn_norm
+model.layers.15
+model.layers.15.attention
+model.layers.15.attention.wqkv
+model.layers.15.attention.wqkv.lora_dropout
+model.layers.15.attention.wqkv.Plora_A
+model.layers.15.attention.wqkv.Plora_B
+model.layers.15.attention.wqkv.lora_sft_A
+model.layers.15.attention.wqkv.lora_sft_B
+model.layers.15.attention.wqkv.lora_dpo_A
+model.layers.15.attention.wqkv.lora_dpo_B
+model.layers.15.attention.wqkv.lora_web_A
+model.layers.15.attention.wqkv.lora_web_B
+model.layers.15.attention.wo
+model.layers.15.attention.wo.lora_dropout
+model.layers.15.attention.wo.Plora_A
+model.layers.15.attention.wo.Plora_B
+model.layers.15.attention.wo.lora_sft_A
+model.layers.15.attention.wo.lora_sft_B
+model.layers.15.attention.wo.lora_dpo_A
+model.layers.15.attention.wo.lora_dpo_B
+model.layers.15.attention.wo.lora_web_A
+model.layers.15.attention.wo.lora_web_B
+model.layers.15.attention.rotary_emb
+model.layers.15.feed_forward
+model.layers.15.feed_forward.w1
+model.layers.15.feed_forward.w1.lora_dropout
+model.layers.15.feed_forward.w1.Plora_A
+model.layers.15.feed_forward.w1.Plora_B
+model.layers.15.feed_forward.w1.lora_sft_A
+model.layers.15.feed_forward.w1.lora_sft_B
+model.layers.15.feed_forward.w1.lora_dpo_A
+model.layers.15.feed_forward.w1.lora_dpo_B
+model.layers.15.feed_forward.w1.lora_web_A
+model.layers.15.feed_forward.w1.lora_web_B
+model.layers.15.feed_forward.w3
+model.layers.15.feed_forward.w3.lora_dropout
+model.layers.15.feed_forward.w3.Plora_A
+model.layers.15.feed_forward.w3.Plora_B
+model.layers.15.feed_forward.w3.lora_sft_A
+model.layers.15.feed_forward.w3.lora_sft_B
+model.layers.15.feed_forward.w3.lora_dpo_A
+model.layers.15.feed_forward.w3.lora_dpo_B
+model.layers.15.feed_forward.w3.lora_web_A
+model.layers.15.feed_forward.w3.lora_web_B
+model.layers.15.feed_forward.w2
+model.layers.15.feed_forward.w2.lora_dropout
+model.layers.15.feed_forward.w2.Plora_A
+model.layers.15.feed_forward.w2.Plora_B
+model.layers.15.feed_forward.w2.lora_sft_A
+model.layers.15.feed_forward.w2.lora_sft_B
+model.layers.15.feed_forward.w2.lora_dpo_A
+model.layers.15.feed_forward.w2.lora_dpo_B
+model.layers.15.feed_forward.w2.lora_web_A
+model.layers.15.feed_forward.w2.lora_web_B
+model.layers.15.feed_forward.act_fn
+model.layers.15.attention_norm
+model.layers.15.ffn_norm
+model.layers.16
+model.layers.16.attention
+model.layers.16.attention.wqkv
+model.layers.16.attention.wqkv.lora_dropout
+model.layers.16.attention.wqkv.Plora_A
+model.layers.16.attention.wqkv.Plora_B
+model.layers.16.attention.wqkv.lora_sft_A
+model.layers.16.attention.wqkv.lora_sft_B
+model.layers.16.attention.wqkv.lora_dpo_A
+model.layers.16.attention.wqkv.lora_dpo_B
+model.layers.16.attention.wqkv.lora_web_A
+model.layers.16.attention.wqkv.lora_web_B
+model.layers.16.attention.wo
+model.layers.16.attention.wo.lora_dropout
+model.layers.16.attention.wo.Plora_A
+model.layers.16.attention.wo.Plora_B
+model.layers.16.attention.wo.lora_sft_A
+model.layers.16.attention.wo.lora_sft_B
+model.layers.16.attention.wo.lora_dpo_A
+model.layers.16.attention.wo.lora_dpo_B
+model.layers.16.attention.wo.lora_web_A
+model.layers.16.attention.wo.lora_web_B
+model.layers.16.attention.rotary_emb
+model.layers.16.feed_forward
+model.layers.16.feed_forward.w1
+model.layers.16.feed_forward.w1.lora_dropout
+model.layers.16.feed_forward.w1.Plora_A
+model.layers.16.feed_forward.w1.Plora_B
+model.layers.16.feed_forward.w1.lora_sft_A
+model.layers.16.feed_forward.w1.lora_sft_B
+model.layers.16.feed_forward.w1.lora_dpo_A
+model.layers.16.feed_forward.w1.lora_dpo_B
+model.layers.16.feed_forward.w1.lora_web_A
+model.layers.16.feed_forward.w1.lora_web_B
+model.layers.16.feed_forward.w3
+model.layers.16.feed_forward.w3.lora_dropout
+model.layers.16.feed_forward.w3.Plora_A
+model.layers.16.feed_forward.w3.Plora_B
+model.layers.16.feed_forward.w3.lora_sft_A
+model.layers.16.feed_forward.w3.lora_sft_B
+model.layers.16.feed_forward.w3.lora_dpo_A
+model.layers.16.feed_forward.w3.lora_dpo_B
+model.layers.16.feed_forward.w3.lora_web_A
+model.layers.16.feed_forward.w3.lora_web_B
+model.layers.16.feed_forward.w2
+model.layers.16.feed_forward.w2.lora_dropout
+model.layers.16.feed_forward.w2.Plora_A
+model.layers.16.feed_forward.w2.Plora_B
+model.layers.16.feed_forward.w2.lora_sft_A
+model.layers.16.feed_forward.w2.lora_sft_B
+model.layers.16.feed_forward.w2.lora_dpo_A
+model.layers.16.feed_forward.w2.lora_dpo_B
+model.layers.16.feed_forward.w2.lora_web_A
+model.layers.16.feed_forward.w2.lora_web_B
+model.layers.16.feed_forward.act_fn
+model.layers.16.attention_norm
+model.layers.16.ffn_norm
+model.layers.17
+model.layers.17.attention
+model.layers.17.attention.wqkv
+model.layers.17.attention.wqkv.lora_dropout
+model.layers.17.attention.wqkv.Plora_A
+model.layers.17.attention.wqkv.Plora_B
+model.layers.17.attention.wqkv.lora_sft_A
+model.layers.17.attention.wqkv.lora_sft_B
+model.layers.17.attention.wqkv.lora_dpo_A
+model.layers.17.attention.wqkv.lora_dpo_B
+model.layers.17.attention.wqkv.lora_web_A
+model.layers.17.attention.wqkv.lora_web_B
+model.layers.17.attention.wo
+model.layers.17.attention.wo.lora_dropout
+model.layers.17.attention.wo.Plora_A
+model.layers.17.attention.wo.Plora_B
+model.layers.17.attention.wo.lora_sft_A
+model.layers.17.attention.wo.lora_sft_B
+model.layers.17.attention.wo.lora_dpo_A
+model.layers.17.attention.wo.lora_dpo_B
+model.layers.17.attention.wo.lora_web_A
+model.layers.17.attention.wo.lora_web_B
+model.layers.17.attention.rotary_emb
+model.layers.17.feed_forward
+model.layers.17.feed_forward.w1
+model.layers.17.feed_forward.w1.lora_dropout
+model.layers.17.feed_forward.w1.Plora_A
+model.layers.17.feed_forward.w1.Plora_B
+model.layers.17.feed_forward.w1.lora_sft_A
+model.layers.17.feed_forward.w1.lora_sft_B
+model.layers.17.feed_forward.w1.lora_dpo_A
+model.layers.17.feed_forward.w1.lora_dpo_B
+model.layers.17.feed_forward.w1.lora_web_A
+model.layers.17.feed_forward.w1.lora_web_B
+model.layers.17.feed_forward.w3
+model.layers.17.feed_forward.w3.lora_dropout
+model.layers.17.feed_forward.w3.Plora_A
+model.layers.17.feed_forward.w3.Plora_B
+model.layers.17.feed_forward.w3.lora_sft_A
+model.layers.17.feed_forward.w3.lora_sft_B
+model.layers.17.feed_forward.w3.lora_dpo_A
+model.layers.17.feed_forward.w3.lora_dpo_B
+model.layers.17.feed_forward.w3.lora_web_A
+model.layers.17.feed_forward.w3.lora_web_B
+model.layers.17.feed_forward.w2
+model.layers.17.feed_forward.w2.lora_dropout
+model.layers.17.feed_forward.w2.Plora_A
+model.layers.17.feed_forward.w2.Plora_B
+model.layers.17.feed_forward.w2.lora_sft_A
+model.layers.17.feed_forward.w2.lora_sft_B
+model.layers.17.feed_forward.w2.lora_dpo_A
+model.layers.17.feed_forward.w2.lora_dpo_B
+model.layers.17.feed_forward.w2.lora_web_A
+model.layers.17.feed_forward.w2.lora_web_B
+model.layers.17.feed_forward.act_fn
+model.layers.17.attention_norm
+model.layers.17.ffn_norm
+model.layers.18
+model.layers.18.attention
+model.layers.18.attention.wqkv
+model.layers.18.attention.wqkv.lora_dropout
+model.layers.18.attention.wqkv.Plora_A
+model.layers.18.attention.wqkv.Plora_B
+model.layers.18.attention.wqkv.lora_sft_A
+model.layers.18.attention.wqkv.lora_sft_B
+model.layers.18.attention.wqkv.lora_dpo_A
+model.layers.18.attention.wqkv.lora_dpo_B
+model.layers.18.attention.wqkv.lora_web_A
+model.layers.18.attention.wqkv.lora_web_B
+model.layers.18.attention.wo
+model.layers.18.attention.wo.lora_dropout
+model.layers.18.attention.wo.Plora_A
+model.layers.18.attention.wo.Plora_B
+model.layers.18.attention.wo.lora_sft_A
+model.layers.18.attention.wo.lora_sft_B
+model.layers.18.attention.wo.lora_dpo_A
+model.layers.18.attention.wo.lora_dpo_B
+model.layers.18.attention.wo.lora_web_A
+model.layers.18.attention.wo.lora_web_B
+model.layers.18.attention.rotary_emb
+model.layers.18.feed_forward
+model.layers.18.feed_forward.w1
+model.layers.18.feed_forward.w1.lora_dropout
+model.layers.18.feed_forward.w1.Plora_A
+model.layers.18.feed_forward.w1.Plora_B
+model.layers.18.feed_forward.w1.lora_sft_A
+model.layers.18.feed_forward.w1.lora_sft_B
+model.layers.18.feed_forward.w1.lora_dpo_A
+model.layers.18.feed_forward.w1.lora_dpo_B
+model.layers.18.feed_forward.w1.lora_web_A
+model.layers.18.feed_forward.w1.lora_web_B
+model.layers.18.feed_forward.w3
+model.layers.18.feed_forward.w3.lora_dropout
+model.layers.18.feed_forward.w3.Plora_A
+model.layers.18.feed_forward.w3.Plora_B
+model.layers.18.feed_forward.w3.lora_sft_A
+model.layers.18.feed_forward.w3.lora_sft_B
+model.layers.18.feed_forward.w3.lora_dpo_A
+model.layers.18.feed_forward.w3.lora_dpo_B
+model.layers.18.feed_forward.w3.lora_web_A
+model.layers.18.feed_forward.w3.lora_web_B
+model.layers.18.feed_forward.w2
+model.layers.18.feed_forward.w2.lora_dropout
+model.layers.18.feed_forward.w2.Plora_A
+model.layers.18.feed_forward.w2.Plora_B
+model.layers.18.feed_forward.w2.lora_sft_A
+model.layers.18.feed_forward.w2.lora_sft_B
+model.layers.18.feed_forward.w2.lora_dpo_A
+model.layers.18.feed_forward.w2.lora_dpo_B
+model.layers.18.feed_forward.w2.lora_web_A
+model.layers.18.feed_forward.w2.lora_web_B
+model.layers.18.feed_forward.act_fn
+model.layers.18.attention_norm
+model.layers.18.ffn_norm
+model.layers.19
+model.layers.19.attention
+model.layers.19.attention.wqkv
+model.layers.19.attention.wqkv.lora_dropout
+model.layers.19.attention.wqkv.Plora_A
+model.layers.19.attention.wqkv.Plora_B
+model.layers.19.attention.wqkv.lora_sft_A
+model.layers.19.attention.wqkv.lora_sft_B
+model.layers.19.attention.wqkv.lora_dpo_A
+model.layers.19.attention.wqkv.lora_dpo_B
+model.layers.19.attention.wqkv.lora_web_A
+model.layers.19.attention.wqkv.lora_web_B
+model.layers.19.attention.wo
+model.layers.19.attention.wo.lora_dropout
+model.layers.19.attention.wo.Plora_A
+model.layers.19.attention.wo.Plora_B
+model.layers.19.attention.wo.lora_sft_A
+model.layers.19.attention.wo.lora_sft_B
+model.layers.19.attention.wo.lora_dpo_A
+model.layers.19.attention.wo.lora_dpo_B
+model.layers.19.attention.wo.lora_web_A
+model.layers.19.attention.wo.lora_web_B
+model.layers.19.attention.rotary_emb
+model.layers.19.feed_forward
+model.layers.19.feed_forward.w1
+model.layers.19.feed_forward.w1.lora_dropout
+model.layers.19.feed_forward.w1.Plora_A
+model.layers.19.feed_forward.w1.Plora_B
+model.layers.19.feed_forward.w1.lora_sft_A
+model.layers.19.feed_forward.w1.lora_sft_B
+model.layers.19.feed_forward.w1.lora_dpo_A
+model.layers.19.feed_forward.w1.lora_dpo_B
+model.layers.19.feed_forward.w1.lora_web_A
+model.layers.19.feed_forward.w1.lora_web_B
+model.layers.19.feed_forward.w3
+model.layers.19.feed_forward.w3.lora_dropout
+model.layers.19.feed_forward.w3.Plora_A
+model.layers.19.feed_forward.w3.Plora_B
+model.layers.19.feed_forward.w3.lora_sft_A
+model.layers.19.feed_forward.w3.lora_sft_B
+model.layers.19.feed_forward.w3.lora_dpo_A
+model.layers.19.feed_forward.w3.lora_dpo_B
+model.layers.19.feed_forward.w3.lora_web_A
+model.layers.19.feed_forward.w3.lora_web_B
+model.layers.19.feed_forward.w2
+model.layers.19.feed_forward.w2.lora_dropout
+model.layers.19.feed_forward.w2.Plora_A
+model.layers.19.feed_forward.w2.Plora_B
+model.layers.19.feed_forward.w2.lora_sft_A
+model.layers.19.feed_forward.w2.lora_sft_B
+model.layers.19.feed_forward.w2.lora_dpo_A
+model.layers.19.feed_forward.w2.lora_dpo_B
+model.layers.19.feed_forward.w2.lora_web_A
+model.layers.19.feed_forward.w2.lora_web_B
+model.layers.19.feed_forward.act_fn
+model.layers.19.attention_norm
+model.layers.19.ffn_norm
+model.layers.20
+model.layers.20.attention
+model.layers.20.attention.wqkv
+model.layers.20.attention.wqkv.lora_dropout
+model.layers.20.attention.wqkv.Plora_A
+model.layers.20.attention.wqkv.Plora_B
+model.layers.20.attention.wqkv.lora_sft_A
+model.layers.20.attention.wqkv.lora_sft_B
+model.layers.20.attention.wqkv.lora_dpo_A
+model.layers.20.attention.wqkv.lora_dpo_B
+model.layers.20.attention.wqkv.lora_web_A
+model.layers.20.attention.wqkv.lora_web_B
+model.layers.20.attention.wo
+model.layers.20.attention.wo.lora_dropout
+model.layers.20.attention.wo.Plora_A
+model.layers.20.attention.wo.Plora_B
+model.layers.20.attention.wo.lora_sft_A
+model.layers.20.attention.wo.lora_sft_B
+model.layers.20.attention.wo.lora_dpo_A
+model.layers.20.attention.wo.lora_dpo_B
+model.layers.20.attention.wo.lora_web_A
+model.layers.20.attention.wo.lora_web_B
+model.layers.20.attention.rotary_emb
+model.layers.20.feed_forward
+model.layers.20.feed_forward.w1
+model.layers.20.feed_forward.w1.lora_dropout
+model.layers.20.feed_forward.w1.Plora_A
+model.layers.20.feed_forward.w1.Plora_B
+model.layers.20.feed_forward.w1.lora_sft_A
+model.layers.20.feed_forward.w1.lora_sft_B
+model.layers.20.feed_forward.w1.lora_dpo_A
+model.layers.20.feed_forward.w1.lora_dpo_B
+model.layers.20.feed_forward.w1.lora_web_A
+model.layers.20.feed_forward.w1.lora_web_B
+model.layers.20.feed_forward.w3
+model.layers.20.feed_forward.w3.lora_dropout
+model.layers.20.feed_forward.w3.Plora_A
+model.layers.20.feed_forward.w3.Plora_B
+model.layers.20.feed_forward.w3.lora_sft_A
+model.layers.20.feed_forward.w3.lora_sft_B
+model.layers.20.feed_forward.w3.lora_dpo_A
+model.layers.20.feed_forward.w3.lora_dpo_B
+model.layers.20.feed_forward.w3.lora_web_A
+model.layers.20.feed_forward.w3.lora_web_B
+model.layers.20.feed_forward.w2
+model.layers.20.feed_forward.w2.lora_dropout
+model.layers.20.feed_forward.w2.Plora_A
+model.layers.20.feed_forward.w2.Plora_B
+model.layers.20.feed_forward.w2.lora_sft_A
+model.layers.20.feed_forward.w2.lora_sft_B
+model.layers.20.feed_forward.w2.lora_dpo_A
+model.layers.20.feed_forward.w2.lora_dpo_B
+model.layers.20.feed_forward.w2.lora_web_A
+model.layers.20.feed_forward.w2.lora_web_B
+model.layers.20.feed_forward.act_fn
+model.layers.20.attention_norm
+model.layers.20.ffn_norm
+model.layers.21
+model.layers.21.attention
+model.layers.21.attention.wqkv
+model.layers.21.attention.wqkv.lora_dropout
+model.layers.21.attention.wqkv.Plora_A
+model.layers.21.attention.wqkv.Plora_B
+model.layers.21.attention.wqkv.lora_sft_A
+model.layers.21.attention.wqkv.lora_sft_B
+model.layers.21.attention.wqkv.lora_dpo_A
+model.layers.21.attention.wqkv.lora_dpo_B
+model.layers.21.attention.wqkv.lora_web_A
+model.layers.21.attention.wqkv.lora_web_B
+model.layers.21.attention.wo
+model.layers.21.attention.wo.lora_dropout
+model.layers.21.attention.wo.Plora_A
+model.layers.21.attention.wo.Plora_B
+model.layers.21.attention.wo.lora_sft_A
+model.layers.21.attention.wo.lora_sft_B
+model.layers.21.attention.wo.lora_dpo_A
+model.layers.21.attention.wo.lora_dpo_B
+model.layers.21.attention.wo.lora_web_A
+model.layers.21.attention.wo.lora_web_B
+model.layers.21.attention.rotary_emb
+model.layers.21.feed_forward
+model.layers.21.feed_forward.w1
+model.layers.21.feed_forward.w1.lora_dropout
+model.layers.21.feed_forward.w1.Plora_A
+model.layers.21.feed_forward.w1.Plora_B
+model.layers.21.feed_forward.w1.lora_sft_A
+model.layers.21.feed_forward.w1.lora_sft_B
+model.layers.21.feed_forward.w1.lora_dpo_A
+model.layers.21.feed_forward.w1.lora_dpo_B
+model.layers.21.feed_forward.w1.lora_web_A
+model.layers.21.feed_forward.w1.lora_web_B
+model.layers.21.feed_forward.w3
+model.layers.21.feed_forward.w3.lora_dropout
+model.layers.21.feed_forward.w3.Plora_A
+model.layers.21.feed_forward.w3.Plora_B
+model.layers.21.feed_forward.w3.lora_sft_A
+model.layers.21.feed_forward.w3.lora_sft_B
+model.layers.21.feed_forward.w3.lora_dpo_A
+model.layers.21.feed_forward.w3.lora_dpo_B
+model.layers.21.feed_forward.w3.lora_web_A
+model.layers.21.feed_forward.w3.lora_web_B
+model.layers.21.feed_forward.w2
+model.layers.21.feed_forward.w2.lora_dropout
+model.layers.21.feed_forward.w2.Plora_A
+model.layers.21.feed_forward.w2.Plora_B
+model.layers.21.feed_forward.w2.lora_sft_A
+model.layers.21.feed_forward.w2.lora_sft_B
+model.layers.21.feed_forward.w2.lora_dpo_A
+model.layers.21.feed_forward.w2.lora_dpo_B
+model.layers.21.feed_forward.w2.lora_web_A
+model.layers.21.feed_forward.w2.lora_web_B
+model.layers.21.feed_forward.act_fn
+model.layers.21.attention_norm
+model.layers.21.ffn_norm
+model.layers.22
+model.layers.22.attention
+model.layers.22.attention.wqkv
+model.layers.22.attention.wqkv.lora_dropout
+model.layers.22.attention.wqkv.Plora_A
+model.layers.22.attention.wqkv.Plora_B
+model.layers.22.attention.wqkv.lora_sft_A
+model.layers.22.attention.wqkv.lora_sft_B
+model.layers.22.attention.wqkv.lora_dpo_A
+model.layers.22.attention.wqkv.lora_dpo_B
+model.layers.22.attention.wqkv.lora_web_A
+model.layers.22.attention.wqkv.lora_web_B
+model.layers.22.attention.wo
+model.layers.22.attention.wo.lora_dropout
+model.layers.22.attention.wo.Plora_A
+model.layers.22.attention.wo.Plora_B
+model.layers.22.attention.wo.lora_sft_A
+model.layers.22.attention.wo.lora_sft_B
+model.layers.22.attention.wo.lora_dpo_A
+model.layers.22.attention.wo.lora_dpo_B
+model.layers.22.attention.wo.lora_web_A
+model.layers.22.attention.wo.lora_web_B
+model.layers.22.attention.rotary_emb
+model.layers.22.feed_forward
+model.layers.22.feed_forward.w1
+model.layers.22.feed_forward.w1.lora_dropout
+model.layers.22.feed_forward.w1.Plora_A
+model.layers.22.feed_forward.w1.Plora_B
+model.layers.22.feed_forward.w1.lora_sft_A
+model.layers.22.feed_forward.w1.lora_sft_B
+model.layers.22.feed_forward.w1.lora_dpo_A
+model.layers.22.feed_forward.w1.lora_dpo_B
+model.layers.22.feed_forward.w1.lora_web_A
+model.layers.22.feed_forward.w1.lora_web_B
+model.layers.22.feed_forward.w3
+model.layers.22.feed_forward.w3.lora_dropout
+model.layers.22.feed_forward.w3.Plora_A
+model.layers.22.feed_forward.w3.Plora_B
+model.layers.22.feed_forward.w3.lora_sft_A
+model.layers.22.feed_forward.w3.lora_sft_B
+model.layers.22.feed_forward.w3.lora_dpo_A
+model.layers.22.feed_forward.w3.lora_dpo_B
+model.layers.22.feed_forward.w3.lora_web_A
+model.layers.22.feed_forward.w3.lora_web_B
+model.layers.22.feed_forward.w2
+model.layers.22.feed_forward.w2.lora_dropout
+model.layers.22.feed_forward.w2.Plora_A
+model.layers.22.feed_forward.w2.Plora_B
+model.layers.22.feed_forward.w2.lora_sft_A
+model.layers.22.feed_forward.w2.lora_sft_B
+model.layers.22.feed_forward.w2.lora_dpo_A
+model.layers.22.feed_forward.w2.lora_dpo_B
+model.layers.22.feed_forward.w2.lora_web_A
+model.layers.22.feed_forward.w2.lora_web_B
+model.layers.22.feed_forward.act_fn
+model.layers.22.attention_norm
+model.layers.22.ffn_norm
+model.layers.23
+model.layers.23.attention
+model.layers.23.attention.wqkv
+model.layers.23.attention.wqkv.lora_dropout
+model.layers.23.attention.wqkv.Plora_A
+model.layers.23.attention.wqkv.Plora_B
+model.layers.23.attention.wqkv.lora_sft_A
+model.layers.23.attention.wqkv.lora_sft_B
+model.layers.23.attention.wqkv.lora_dpo_A
+model.layers.23.attention.wqkv.lora_dpo_B
+model.layers.23.attention.wqkv.lora_web_A
+model.layers.23.attention.wqkv.lora_web_B
+model.layers.23.attention.wo
+model.layers.23.attention.wo.lora_dropout
+model.layers.23.attention.wo.Plora_A
+model.layers.23.attention.wo.Plora_B
+model.layers.23.attention.wo.lora_sft_A
+model.layers.23.attention.wo.lora_sft_B
+model.layers.23.attention.wo.lora_dpo_A
+model.layers.23.attention.wo.lora_dpo_B
+model.layers.23.attention.wo.lora_web_A
+model.layers.23.attention.wo.lora_web_B
+model.layers.23.attention.rotary_emb
+model.layers.23.feed_forward
+model.layers.23.feed_forward.w1
+model.layers.23.feed_forward.w1.lora_dropout
+model.layers.23.feed_forward.w1.Plora_A
+model.layers.23.feed_forward.w1.Plora_B
+model.layers.23.feed_forward.w1.lora_sft_A
+model.layers.23.feed_forward.w1.lora_sft_B
+model.layers.23.feed_forward.w1.lora_dpo_A
+model.layers.23.feed_forward.w1.lora_dpo_B
+model.layers.23.feed_forward.w1.lora_web_A
+model.layers.23.feed_forward.w1.lora_web_B
+model.layers.23.feed_forward.w3
+model.layers.23.feed_forward.w3.lora_dropout
+model.layers.23.feed_forward.w3.Plora_A
+model.layers.23.feed_forward.w3.Plora_B
+model.layers.23.feed_forward.w3.lora_sft_A
+model.layers.23.feed_forward.w3.lora_sft_B
+model.layers.23.feed_forward.w3.lora_dpo_A
+model.layers.23.feed_forward.w3.lora_dpo_B
+model.layers.23.feed_forward.w3.lora_web_A
+model.layers.23.feed_forward.w3.lora_web_B
+model.layers.23.feed_forward.w2
+model.layers.23.feed_forward.w2.lora_dropout
+model.layers.23.feed_forward.w2.Plora_A
+model.layers.23.feed_forward.w2.Plora_B
+model.layers.23.feed_forward.w2.lora_sft_A
+model.layers.23.feed_forward.w2.lora_sft_B
+model.layers.23.feed_forward.w2.lora_dpo_A
+model.layers.23.feed_forward.w2.lora_dpo_B
+model.layers.23.feed_forward.w2.lora_web_A
+model.layers.23.feed_forward.w2.lora_web_B
+model.layers.23.feed_forward.act_fn
+model.layers.23.attention_norm
+model.layers.23.ffn_norm
+model.layers.24
+model.layers.24.attention
+model.layers.24.attention.wqkv
+model.layers.24.attention.wqkv.lora_dropout
+model.layers.24.attention.wqkv.Plora_A
+model.layers.24.attention.wqkv.Plora_B
+model.layers.24.attention.wqkv.lora_sft_A
+model.layers.24.attention.wqkv.lora_sft_B
+model.layers.24.attention.wqkv.lora_dpo_A
+model.layers.24.attention.wqkv.lora_dpo_B
+model.layers.24.attention.wqkv.lora_web_A
+model.layers.24.attention.wqkv.lora_web_B
+model.layers.24.attention.wo
+model.layers.24.attention.wo.lora_dropout
+model.layers.24.attention.wo.Plora_A
+model.layers.24.attention.wo.Plora_B
+model.layers.24.attention.wo.lora_sft_A
+model.layers.24.attention.wo.lora_sft_B
+model.layers.24.attention.wo.lora_dpo_A
+model.layers.24.attention.wo.lora_dpo_B
+model.layers.24.attention.wo.lora_web_A
+model.layers.24.attention.wo.lora_web_B
+model.layers.24.attention.rotary_emb
+model.layers.24.feed_forward
+model.layers.24.feed_forward.w1
+model.layers.24.feed_forward.w1.lora_dropout
+model.layers.24.feed_forward.w1.Plora_A
+model.layers.24.feed_forward.w1.Plora_B
+model.layers.24.feed_forward.w1.lora_sft_A
+model.layers.24.feed_forward.w1.lora_sft_B
+model.layers.24.feed_forward.w1.lora_dpo_A
+model.layers.24.feed_forward.w1.lora_dpo_B
+model.layers.24.feed_forward.w1.lora_web_A
+model.layers.24.feed_forward.w1.lora_web_B
+model.layers.24.feed_forward.w3
+model.layers.24.feed_forward.w3.lora_dropout
+model.layers.24.feed_forward.w3.Plora_A
+model.layers.24.feed_forward.w3.Plora_B
+model.layers.24.feed_forward.w3.lora_sft_A
+model.layers.24.feed_forward.w3.lora_sft_B
+model.layers.24.feed_forward.w3.lora_dpo_A
+model.layers.24.feed_forward.w3.lora_dpo_B
+model.layers.24.feed_forward.w3.lora_web_A
+model.layers.24.feed_forward.w3.lora_web_B
+model.layers.24.feed_forward.w2
+model.layers.24.feed_forward.w2.lora_dropout
+model.layers.24.feed_forward.w2.Plora_A
+model.layers.24.feed_forward.w2.Plora_B
+model.layers.24.feed_forward.w2.lora_sft_A
+model.layers.24.feed_forward.w2.lora_sft_B
+model.layers.24.feed_forward.w2.lora_dpo_A
+model.layers.24.feed_forward.w2.lora_dpo_B
+model.layers.24.feed_forward.w2.lora_web_A
+model.layers.24.feed_forward.w2.lora_web_B
+model.layers.24.feed_forward.act_fn
+model.layers.24.attention_norm
+model.layers.24.ffn_norm
+model.layers.25
+model.layers.25.attention
+model.layers.25.attention.wqkv
+model.layers.25.attention.wqkv.lora_dropout
+model.layers.25.attention.wqkv.Plora_A
+model.layers.25.attention.wqkv.Plora_B
+model.layers.25.attention.wqkv.lora_sft_A
+model.layers.25.attention.wqkv.lora_sft_B
+model.layers.25.attention.wqkv.lora_dpo_A
+model.layers.25.attention.wqkv.lora_dpo_B
+model.layers.25.attention.wqkv.lora_web_A
+model.layers.25.attention.wqkv.lora_web_B
+model.layers.25.attention.wo
+model.layers.25.attention.wo.lora_dropout
+model.layers.25.attention.wo.Plora_A
+model.layers.25.attention.wo.Plora_B
+model.layers.25.attention.wo.lora_sft_A
+model.layers.25.attention.wo.lora_sft_B
+model.layers.25.attention.wo.lora_dpo_A
+model.layers.25.attention.wo.lora_dpo_B
+model.layers.25.attention.wo.lora_web_A
+model.layers.25.attention.wo.lora_web_B
+model.layers.25.attention.rotary_emb
+model.layers.25.feed_forward
+model.layers.25.feed_forward.w1
+model.layers.25.feed_forward.w1.lora_dropout
+model.layers.25.feed_forward.w1.Plora_A
+model.layers.25.feed_forward.w1.Plora_B
+model.layers.25.feed_forward.w1.lora_sft_A
+model.layers.25.feed_forward.w1.lora_sft_B
+model.layers.25.feed_forward.w1.lora_dpo_A
+model.layers.25.feed_forward.w1.lora_dpo_B
+model.layers.25.feed_forward.w1.lora_web_A
+model.layers.25.feed_forward.w1.lora_web_B
+model.layers.25.feed_forward.w3
+model.layers.25.feed_forward.w3.lora_dropout
+model.layers.25.feed_forward.w3.Plora_A
+model.layers.25.feed_forward.w3.Plora_B
+model.layers.25.feed_forward.w3.lora_sft_A
+model.layers.25.feed_forward.w3.lora_sft_B
+model.layers.25.feed_forward.w3.lora_dpo_A
+model.layers.25.feed_forward.w3.lora_dpo_B
+model.layers.25.feed_forward.w3.lora_web_A
+model.layers.25.feed_forward.w3.lora_web_B
+model.layers.25.feed_forward.w2
+model.layers.25.feed_forward.w2.lora_dropout
+model.layers.25.feed_forward.w2.Plora_A
+model.layers.25.feed_forward.w2.Plora_B
+model.layers.25.feed_forward.w2.lora_sft_A
+model.layers.25.feed_forward.w2.lora_sft_B
+model.layers.25.feed_forward.w2.lora_dpo_A
+model.layers.25.feed_forward.w2.lora_dpo_B
+model.layers.25.feed_forward.w2.lora_web_A
+model.layers.25.feed_forward.w2.lora_web_B
+model.layers.25.feed_forward.act_fn
+model.layers.25.attention_norm
+model.layers.25.ffn_norm
+model.layers.26
+model.layers.26.attention
+model.layers.26.attention.wqkv
+model.layers.26.attention.wqkv.lora_dropout
+model.layers.26.attention.wqkv.Plora_A
+model.layers.26.attention.wqkv.Plora_B
+model.layers.26.attention.wqkv.lora_sft_A
+model.layers.26.attention.wqkv.lora_sft_B
+model.layers.26.attention.wqkv.lora_dpo_A
+model.layers.26.attention.wqkv.lora_dpo_B
+model.layers.26.attention.wqkv.lora_web_A
+model.layers.26.attention.wqkv.lora_web_B
+model.layers.26.attention.wo
+model.layers.26.attention.wo.lora_dropout
+model.layers.26.attention.wo.Plora_A
+model.layers.26.attention.wo.Plora_B
+model.layers.26.attention.wo.lora_sft_A
+model.layers.26.attention.wo.lora_sft_B
+model.layers.26.attention.wo.lora_dpo_A
+model.layers.26.attention.wo.lora_dpo_B
+model.layers.26.attention.wo.lora_web_A
+model.layers.26.attention.wo.lora_web_B
+model.layers.26.attention.rotary_emb
+model.layers.26.feed_forward
+model.layers.26.feed_forward.w1
+model.layers.26.feed_forward.w1.lora_dropout
+model.layers.26.feed_forward.w1.Plora_A
+model.layers.26.feed_forward.w1.Plora_B
+model.layers.26.feed_forward.w1.lora_sft_A
+model.layers.26.feed_forward.w1.lora_sft_B
+model.layers.26.feed_forward.w1.lora_dpo_A
+model.layers.26.feed_forward.w1.lora_dpo_B
+model.layers.26.feed_forward.w1.lora_web_A
+model.layers.26.feed_forward.w1.lora_web_B
+model.layers.26.feed_forward.w3
+model.layers.26.feed_forward.w3.lora_dropout
+model.layers.26.feed_forward.w3.Plora_A
+model.layers.26.feed_forward.w3.Plora_B
+model.layers.26.feed_forward.w3.lora_sft_A
+model.layers.26.feed_forward.w3.lora_sft_B
+model.layers.26.feed_forward.w3.lora_dpo_A
+model.layers.26.feed_forward.w3.lora_dpo_B
+model.layers.26.feed_forward.w3.lora_web_A
+model.layers.26.feed_forward.w3.lora_web_B
+model.layers.26.feed_forward.w2
+model.layers.26.feed_forward.w2.lora_dropout
+model.layers.26.feed_forward.w2.Plora_A
+model.layers.26.feed_forward.w2.Plora_B
+model.layers.26.feed_forward.w2.lora_sft_A
+model.layers.26.feed_forward.w2.lora_sft_B
+model.layers.26.feed_forward.w2.lora_dpo_A
+model.layers.26.feed_forward.w2.lora_dpo_B
+model.layers.26.feed_forward.w2.lora_web_A
+model.layers.26.feed_forward.w2.lora_web_B
+model.layers.26.feed_forward.act_fn
+model.layers.26.attention_norm
+model.layers.26.ffn_norm
+model.layers.27
+model.layers.27.attention
+model.layers.27.attention.wqkv
+model.layers.27.attention.wqkv.lora_dropout
+model.layers.27.attention.wqkv.Plora_A
+model.layers.27.attention.wqkv.Plora_B
+model.layers.27.attention.wqkv.lora_sft_A
+model.layers.27.attention.wqkv.lora_sft_B
+model.layers.27.attention.wqkv.lora_dpo_A
+model.layers.27.attention.wqkv.lora_dpo_B
+model.layers.27.attention.wqkv.lora_web_A
+model.layers.27.attention.wqkv.lora_web_B
+model.layers.27.attention.wo
+model.layers.27.attention.wo.lora_dropout
+model.layers.27.attention.wo.Plora_A
+model.layers.27.attention.wo.Plora_B
+model.layers.27.attention.wo.lora_sft_A
+model.layers.27.attention.wo.lora_sft_B
+model.layers.27.attention.wo.lora_dpo_A
+model.layers.27.attention.wo.lora_dpo_B
+model.layers.27.attention.wo.lora_web_A
+model.layers.27.attention.wo.lora_web_B
+model.layers.27.attention.rotary_emb
+model.layers.27.feed_forward
+model.layers.27.feed_forward.w1
+model.layers.27.feed_forward.w1.lora_dropout
+model.layers.27.feed_forward.w1.Plora_A
+model.layers.27.feed_forward.w1.Plora_B
+model.layers.27.feed_forward.w1.lora_sft_A
+model.layers.27.feed_forward.w1.lora_sft_B
+model.layers.27.feed_forward.w1.lora_dpo_A
+model.layers.27.feed_forward.w1.lora_dpo_B
+model.layers.27.feed_forward.w1.lora_web_A
+model.layers.27.feed_forward.w1.lora_web_B
+model.layers.27.feed_forward.w3
+model.layers.27.feed_forward.w3.lora_dropout
+model.layers.27.feed_forward.w3.Plora_A
+model.layers.27.feed_forward.w3.Plora_B
+model.layers.27.feed_forward.w3.lora_sft_A
+model.layers.27.feed_forward.w3.lora_sft_B
+model.layers.27.feed_forward.w3.lora_dpo_A
+model.layers.27.feed_forward.w3.lora_dpo_B
+model.layers.27.feed_forward.w3.lora_web_A
+model.layers.27.feed_forward.w3.lora_web_B
+model.layers.27.feed_forward.w2
+model.layers.27.feed_forward.w2.lora_dropout
+model.layers.27.feed_forward.w2.Plora_A
+model.layers.27.feed_forward.w2.Plora_B
+model.layers.27.feed_forward.w2.lora_sft_A
+model.layers.27.feed_forward.w2.lora_sft_B
+model.layers.27.feed_forward.w2.lora_dpo_A
+model.layers.27.feed_forward.w2.lora_dpo_B
+model.layers.27.feed_forward.w2.lora_web_A
+model.layers.27.feed_forward.w2.lora_web_B
+model.layers.27.feed_forward.act_fn
+model.layers.27.attention_norm
+model.layers.27.ffn_norm
+model.layers.28
+model.layers.28.attention
+model.layers.28.attention.wqkv
+model.layers.28.attention.wqkv.lora_dropout
+model.layers.28.attention.wqkv.Plora_A
+model.layers.28.attention.wqkv.Plora_B
+model.layers.28.attention.wqkv.lora_sft_A
+model.layers.28.attention.wqkv.lora_sft_B
+model.layers.28.attention.wqkv.lora_dpo_A
+model.layers.28.attention.wqkv.lora_dpo_B
+model.layers.28.attention.wqkv.lora_web_A
+model.layers.28.attention.wqkv.lora_web_B
+model.layers.28.attention.wo
+model.layers.28.attention.wo.lora_dropout
+model.layers.28.attention.wo.Plora_A
+model.layers.28.attention.wo.Plora_B
+model.layers.28.attention.wo.lora_sft_A
+model.layers.28.attention.wo.lora_sft_B
+model.layers.28.attention.wo.lora_dpo_A
+model.layers.28.attention.wo.lora_dpo_B
+model.layers.28.attention.wo.lora_web_A
+model.layers.28.attention.wo.lora_web_B
+model.layers.28.attention.rotary_emb
+model.layers.28.feed_forward
+model.layers.28.feed_forward.w1
+model.layers.28.feed_forward.w1.lora_dropout
+model.layers.28.feed_forward.w1.Plora_A
+model.layers.28.feed_forward.w1.Plora_B
+model.layers.28.feed_forward.w1.lora_sft_A
+model.layers.28.feed_forward.w1.lora_sft_B
+model.layers.28.feed_forward.w1.lora_dpo_A
+model.layers.28.feed_forward.w1.lora_dpo_B
+model.layers.28.feed_forward.w1.lora_web_A
+model.layers.28.feed_forward.w1.lora_web_B
+model.layers.28.feed_forward.w3
+model.layers.28.feed_forward.w3.lora_dropout
+model.layers.28.feed_forward.w3.Plora_A
+model.layers.28.feed_forward.w3.Plora_B
+model.layers.28.feed_forward.w3.lora_sft_A
+model.layers.28.feed_forward.w3.lora_sft_B
+model.layers.28.feed_forward.w3.lora_dpo_A
+model.layers.28.feed_forward.w3.lora_dpo_B
+model.layers.28.feed_forward.w3.lora_web_A
+model.layers.28.feed_forward.w3.lora_web_B
+model.layers.28.feed_forward.w2
+model.layers.28.feed_forward.w2.lora_dropout
+model.layers.28.feed_forward.w2.Plora_A
+model.layers.28.feed_forward.w2.Plora_B
+model.layers.28.feed_forward.w2.lora_sft_A
+model.layers.28.feed_forward.w2.lora_sft_B
+model.layers.28.feed_forward.w2.lora_dpo_A
+model.layers.28.feed_forward.w2.lora_dpo_B
+model.layers.28.feed_forward.w2.lora_web_A
+model.layers.28.feed_forward.w2.lora_web_B
+model.layers.28.feed_forward.act_fn
+model.layers.28.attention_norm
+model.layers.28.ffn_norm
+model.layers.29
+model.layers.29.attention
+model.layers.29.attention.wqkv
+model.layers.29.attention.wqkv.lora_dropout
+model.layers.29.attention.wqkv.Plora_A
+model.layers.29.attention.wqkv.Plora_B
+model.layers.29.attention.wqkv.lora_sft_A
+model.layers.29.attention.wqkv.lora_sft_B
+model.layers.29.attention.wqkv.lora_dpo_A
+model.layers.29.attention.wqkv.lora_dpo_B
+model.layers.29.attention.wqkv.lora_web_A
+model.layers.29.attention.wqkv.lora_web_B
+model.layers.29.attention.wo
+model.layers.29.attention.wo.lora_dropout
+model.layers.29.attention.wo.Plora_A
+model.layers.29.attention.wo.Plora_B
+model.layers.29.attention.wo.lora_sft_A
+model.layers.29.attention.wo.lora_sft_B
+model.layers.29.attention.wo.lora_dpo_A
+model.layers.29.attention.wo.lora_dpo_B
+model.layers.29.attention.wo.lora_web_A
+model.layers.29.attention.wo.lora_web_B
+model.layers.29.attention.rotary_emb
+model.layers.29.feed_forward
+model.layers.29.feed_forward.w1
+model.layers.29.feed_forward.w1.lora_dropout
+model.layers.29.feed_forward.w1.Plora_A
+model.layers.29.feed_forward.w1.Plora_B
+model.layers.29.feed_forward.w1.lora_sft_A
+model.layers.29.feed_forward.w1.lora_sft_B
+model.layers.29.feed_forward.w1.lora_dpo_A
+model.layers.29.feed_forward.w1.lora_dpo_B
+model.layers.29.feed_forward.w1.lora_web_A
+model.layers.29.feed_forward.w1.lora_web_B
+model.layers.29.feed_forward.w3
+model.layers.29.feed_forward.w3.lora_dropout
+model.layers.29.feed_forward.w3.Plora_A
+model.layers.29.feed_forward.w3.Plora_B
+model.layers.29.feed_forward.w3.lora_sft_A
+model.layers.29.feed_forward.w3.lora_sft_B
+model.layers.29.feed_forward.w3.lora_dpo_A
+model.layers.29.feed_forward.w3.lora_dpo_B
+model.layers.29.feed_forward.w3.lora_web_A
+model.layers.29.feed_forward.w3.lora_web_B
+model.layers.29.feed_forward.w2
+model.layers.29.feed_forward.w2.lora_dropout
+model.layers.29.feed_forward.w2.Plora_A
+model.layers.29.feed_forward.w2.Plora_B
+model.layers.29.feed_forward.w2.lora_sft_A
+model.layers.29.feed_forward.w2.lora_sft_B
+model.layers.29.feed_forward.w2.lora_dpo_A
+model.layers.29.feed_forward.w2.lora_dpo_B
+model.layers.29.feed_forward.w2.lora_web_A
+model.layers.29.feed_forward.w2.lora_web_B
+model.layers.29.feed_forward.act_fn
+model.layers.29.attention_norm
+model.layers.29.ffn_norm
+model.layers.30
+model.layers.30.attention
+model.layers.30.attention.wqkv
+model.layers.30.attention.wqkv.lora_dropout
+model.layers.30.attention.wqkv.Plora_A
+model.layers.30.attention.wqkv.Plora_B
+model.layers.30.attention.wqkv.lora_sft_A
+model.layers.30.attention.wqkv.lora_sft_B
+model.layers.30.attention.wqkv.lora_dpo_A
+model.layers.30.attention.wqkv.lora_dpo_B
+model.layers.30.attention.wqkv.lora_web_A
+model.layers.30.attention.wqkv.lora_web_B
+model.layers.30.attention.wo
+model.layers.30.attention.wo.lora_dropout
+model.layers.30.attention.wo.Plora_A
+model.layers.30.attention.wo.Plora_B
+model.layers.30.attention.wo.lora_sft_A
+model.layers.30.attention.wo.lora_sft_B
+model.layers.30.attention.wo.lora_dpo_A
+model.layers.30.attention.wo.lora_dpo_B
+model.layers.30.attention.wo.lora_web_A
+model.layers.30.attention.wo.lora_web_B
+model.layers.30.attention.rotary_emb
+model.layers.30.feed_forward
+model.layers.30.feed_forward.w1
+model.layers.30.feed_forward.w1.lora_dropout
+model.layers.30.feed_forward.w1.Plora_A
+model.layers.30.feed_forward.w1.Plora_B
+model.layers.30.feed_forward.w1.lora_sft_A
+model.layers.30.feed_forward.w1.lora_sft_B
+model.layers.30.feed_forward.w1.lora_dpo_A
+model.layers.30.feed_forward.w1.lora_dpo_B
+model.layers.30.feed_forward.w1.lora_web_A
+model.layers.30.feed_forward.w1.lora_web_B
+model.layers.30.feed_forward.w3
+model.layers.30.feed_forward.w3.lora_dropout
+model.layers.30.feed_forward.w3.Plora_A
+model.layers.30.feed_forward.w3.Plora_B
+model.layers.30.feed_forward.w3.lora_sft_A
+model.layers.30.feed_forward.w3.lora_sft_B
+model.layers.30.feed_forward.w3.lora_dpo_A
+model.layers.30.feed_forward.w3.lora_dpo_B
+model.layers.30.feed_forward.w3.lora_web_A
+model.layers.30.feed_forward.w3.lora_web_B
+model.layers.30.feed_forward.w2
+model.layers.30.feed_forward.w2.lora_dropout
+model.layers.30.feed_forward.w2.Plora_A
+model.layers.30.feed_forward.w2.Plora_B
+model.layers.30.feed_forward.w2.lora_sft_A
+model.layers.30.feed_forward.w2.lora_sft_B
+model.layers.30.feed_forward.w2.lora_dpo_A
+model.layers.30.feed_forward.w2.lora_dpo_B
+model.layers.30.feed_forward.w2.lora_web_A
+model.layers.30.feed_forward.w2.lora_web_B
+model.layers.30.feed_forward.act_fn
+model.layers.30.attention_norm
+model.layers.30.ffn_norm
+model.layers.31
+model.layers.31.attention
+model.layers.31.attention.wqkv
+model.layers.31.attention.wqkv.lora_dropout
+model.layers.31.attention.wqkv.Plora_A
+model.layers.31.attention.wqkv.Plora_B
+model.layers.31.attention.wqkv.lora_sft_A
+model.layers.31.attention.wqkv.lora_sft_B
+model.layers.31.attention.wqkv.lora_dpo_A
+model.layers.31.attention.wqkv.lora_dpo_B
+model.layers.31.attention.wqkv.lora_web_A
+model.layers.31.attention.wqkv.lora_web_B
+model.layers.31.attention.wo
+model.layers.31.attention.wo.lora_dropout
+model.layers.31.attention.wo.Plora_A
+model.layers.31.attention.wo.Plora_B
+model.layers.31.attention.wo.lora_sft_A
+model.layers.31.attention.wo.lora_sft_B
+model.layers.31.attention.wo.lora_dpo_A
+model.layers.31.attention.wo.lora_dpo_B
+model.layers.31.attention.wo.lora_web_A
+model.layers.31.attention.wo.lora_web_B
+model.layers.31.attention.rotary_emb
+model.layers.31.feed_forward
+model.layers.31.feed_forward.w1
+model.layers.31.feed_forward.w1.lora_dropout
+model.layers.31.feed_forward.w1.Plora_A
+model.layers.31.feed_forward.w1.Plora_B
+model.layers.31.feed_forward.w1.lora_sft_A
+model.layers.31.feed_forward.w1.lora_sft_B
+model.layers.31.feed_forward.w1.lora_dpo_A
+model.layers.31.feed_forward.w1.lora_dpo_B
+model.layers.31.feed_forward.w1.lora_web_A
+model.layers.31.feed_forward.w1.lora_web_B
+model.layers.31.feed_forward.w3
+model.layers.31.feed_forward.w3.lora_dropout
+model.layers.31.feed_forward.w3.Plora_A
+model.layers.31.feed_forward.w3.Plora_B
+model.layers.31.feed_forward.w3.lora_sft_A
+model.layers.31.feed_forward.w3.lora_sft_B
+model.layers.31.feed_forward.w3.lora_dpo_A
+model.layers.31.feed_forward.w3.lora_dpo_B
+model.layers.31.feed_forward.w3.lora_web_A
+model.layers.31.feed_forward.w3.lora_web_B
+model.layers.31.feed_forward.w2
+model.layers.31.feed_forward.w2.lora_dropout
+model.layers.31.feed_forward.w2.Plora_A
+model.layers.31.feed_forward.w2.Plora_B
+model.layers.31.feed_forward.w2.lora_sft_A
+model.layers.31.feed_forward.w2.lora_sft_B
+model.layers.31.feed_forward.w2.lora_dpo_A
+model.layers.31.feed_forward.w2.lora_dpo_B
+model.layers.31.feed_forward.w2.lora_web_A
+model.layers.31.feed_forward.w2.lora_web_B
+model.layers.31.feed_forward.act_fn
+model.layers.31.attention_norm
+model.layers.31.ffn_norm
+model.norm
+output
+vit
+vit.vision_tower
+vit.vision_tower.vision_model
+vit.vision_tower.vision_model.embeddings
+vit.vision_tower.vision_model.embeddings.patch_embedding
+vit.vision_tower.vision_model.embeddings.position_embedding
+vit.vision_tower.vision_model.pre_layrnorm
+vit.vision_tower.vision_model.encoder
+vit.vision_tower.vision_model.encoder.layers
+vit.vision_tower.vision_model.encoder.layers.0
+vit.vision_tower.vision_model.encoder.layers.0.self_attn
+vit.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.0.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.0.mlp
+vit.vision_tower.vision_model.encoder.layers.0.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.0.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.0.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.0.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.1
+vit.vision_tower.vision_model.encoder.layers.1.self_attn
+vit.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.1.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.1.mlp
+vit.vision_tower.vision_model.encoder.layers.1.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.1.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.1.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.1.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.2
+vit.vision_tower.vision_model.encoder.layers.2.self_attn
+vit.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.2.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.2.mlp
+vit.vision_tower.vision_model.encoder.layers.2.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.2.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.2.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.2.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.3
+vit.vision_tower.vision_model.encoder.layers.3.self_attn
+vit.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.3.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.3.mlp
+vit.vision_tower.vision_model.encoder.layers.3.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.3.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.3.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.3.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.4
+vit.vision_tower.vision_model.encoder.layers.4.self_attn
+vit.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.4.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.4.mlp
+vit.vision_tower.vision_model.encoder.layers.4.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.4.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.4.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.4.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.5
+vit.vision_tower.vision_model.encoder.layers.5.self_attn
+vit.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.5.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.5.mlp
+vit.vision_tower.vision_model.encoder.layers.5.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.5.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.5.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.5.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.6
+vit.vision_tower.vision_model.encoder.layers.6.self_attn
+vit.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.6.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.6.mlp
+vit.vision_tower.vision_model.encoder.layers.6.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.6.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.6.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.6.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.7
+vit.vision_tower.vision_model.encoder.layers.7.self_attn
+vit.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.7.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.7.mlp
+vit.vision_tower.vision_model.encoder.layers.7.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.7.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.7.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.7.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.8
+vit.vision_tower.vision_model.encoder.layers.8.self_attn
+vit.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.8.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.8.mlp
+vit.vision_tower.vision_model.encoder.layers.8.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.8.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.8.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.8.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.9
+vit.vision_tower.vision_model.encoder.layers.9.self_attn
+vit.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.9.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.9.mlp
+vit.vision_tower.vision_model.encoder.layers.9.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.9.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.9.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.9.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.10
+vit.vision_tower.vision_model.encoder.layers.10.self_attn
+vit.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.10.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.10.mlp
+vit.vision_tower.vision_model.encoder.layers.10.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.10.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.10.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.10.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.11
+vit.vision_tower.vision_model.encoder.layers.11.self_attn
+vit.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.11.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.11.mlp
+vit.vision_tower.vision_model.encoder.layers.11.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.11.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.11.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.11.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.12
+vit.vision_tower.vision_model.encoder.layers.12.self_attn
+vit.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.12.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.12.mlp
+vit.vision_tower.vision_model.encoder.layers.12.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.12.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.12.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.12.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.13
+vit.vision_tower.vision_model.encoder.layers.13.self_attn
+vit.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.13.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.13.mlp
+vit.vision_tower.vision_model.encoder.layers.13.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.13.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.13.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.13.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.14
+vit.vision_tower.vision_model.encoder.layers.14.self_attn
+vit.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.14.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.14.mlp
+vit.vision_tower.vision_model.encoder.layers.14.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.14.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.14.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.14.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.15
+vit.vision_tower.vision_model.encoder.layers.15.self_attn
+vit.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.15.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.15.mlp
+vit.vision_tower.vision_model.encoder.layers.15.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.15.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.15.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.15.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.16
+vit.vision_tower.vision_model.encoder.layers.16.self_attn
+vit.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.16.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.16.mlp
+vit.vision_tower.vision_model.encoder.layers.16.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.16.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.16.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.16.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.17
+vit.vision_tower.vision_model.encoder.layers.17.self_attn
+vit.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.17.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.17.mlp
+vit.vision_tower.vision_model.encoder.layers.17.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.17.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.17.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.17.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.18
+vit.vision_tower.vision_model.encoder.layers.18.self_attn
+vit.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.18.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.18.mlp
+vit.vision_tower.vision_model.encoder.layers.18.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.18.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.18.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.18.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.19
+vit.vision_tower.vision_model.encoder.layers.19.self_attn
+vit.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.19.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.19.mlp
+vit.vision_tower.vision_model.encoder.layers.19.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.19.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.19.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.19.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.20
+vit.vision_tower.vision_model.encoder.layers.20.self_attn
+vit.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.20.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.20.mlp
+vit.vision_tower.vision_model.encoder.layers.20.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.20.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.20.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.20.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.21
+vit.vision_tower.vision_model.encoder.layers.21.self_attn
+vit.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.21.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.21.mlp
+vit.vision_tower.vision_model.encoder.layers.21.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.21.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.21.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.21.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.22
+vit.vision_tower.vision_model.encoder.layers.22.self_attn
+vit.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.22.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.22.mlp
+vit.vision_tower.vision_model.encoder.layers.22.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.22.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.22.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.22.layer_norm2
+vit.vision_tower.vision_model.encoder.layers.23
+vit.vision_tower.vision_model.encoder.layers.23.self_attn
+vit.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj
+vit.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj
+vit.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj
+vit.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj
+vit.vision_tower.vision_model.encoder.layers.23.layer_norm1
+vit.vision_tower.vision_model.encoder.layers.23.mlp
+vit.vision_tower.vision_model.encoder.layers.23.mlp.activation_fn
+vit.vision_tower.vision_model.encoder.layers.23.mlp.fc1
+vit.vision_tower.vision_model.encoder.layers.23.mlp.fc2
+vit.vision_tower.vision_model.encoder.layers.23.layer_norm2
+vit.vision_tower.vision_model.post_layernorm
+vision_proj
+vision_proj.0
+vision_proj.1
+vision_proj.2
diff --git a/logs/internvl/InternVL2_5-8B.txt b/logs/internvl/InternVL2_5-8B.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16668f9fec55179cb4e6df3aa121f90a835a4d89
--- /dev/null
+++ b/logs/internvl/InternVL2_5-8B.txt
@@ -0,0 +1,737 @@
+
+vision_model
+vision_model.embeddings
+vision_model.embeddings.patch_embedding
+vision_model.encoder
+vision_model.encoder.layers
+vision_model.encoder.layers.0
+vision_model.encoder.layers.0.attn
+vision_model.encoder.layers.0.attn.qkv
+vision_model.encoder.layers.0.attn.attn_drop
+vision_model.encoder.layers.0.attn.proj_drop
+vision_model.encoder.layers.0.attn.proj
+vision_model.encoder.layers.0.mlp
+vision_model.encoder.layers.0.mlp.act
+vision_model.encoder.layers.0.mlp.fc1
+vision_model.encoder.layers.0.mlp.fc2
+vision_model.encoder.layers.0.norm1
+vision_model.encoder.layers.0.norm2
+vision_model.encoder.layers.0.drop_path1
+vision_model.encoder.layers.0.drop_path2
+vision_model.encoder.layers.1
+vision_model.encoder.layers.1.attn
+vision_model.encoder.layers.1.attn.qkv
+vision_model.encoder.layers.1.attn.attn_drop
+vision_model.encoder.layers.1.attn.proj_drop
+vision_model.encoder.layers.1.attn.proj
+vision_model.encoder.layers.1.mlp
+vision_model.encoder.layers.1.mlp.act
+vision_model.encoder.layers.1.mlp.fc1
+vision_model.encoder.layers.1.mlp.fc2
+vision_model.encoder.layers.1.norm1
+vision_model.encoder.layers.1.norm2
+vision_model.encoder.layers.1.drop_path1
+vision_model.encoder.layers.1.drop_path2
+vision_model.encoder.layers.2
+vision_model.encoder.layers.2.attn
+vision_model.encoder.layers.2.attn.qkv
+vision_model.encoder.layers.2.attn.attn_drop
+vision_model.encoder.layers.2.attn.proj_drop
+vision_model.encoder.layers.2.attn.proj
+vision_model.encoder.layers.2.mlp
+vision_model.encoder.layers.2.mlp.act
+vision_model.encoder.layers.2.mlp.fc1
+vision_model.encoder.layers.2.mlp.fc2
+vision_model.encoder.layers.2.norm1
+vision_model.encoder.layers.2.norm2
+vision_model.encoder.layers.2.drop_path1
+vision_model.encoder.layers.2.drop_path2
+vision_model.encoder.layers.3
+vision_model.encoder.layers.3.attn
+vision_model.encoder.layers.3.attn.qkv
+vision_model.encoder.layers.3.attn.attn_drop
+vision_model.encoder.layers.3.attn.proj_drop
+vision_model.encoder.layers.3.attn.proj
+vision_model.encoder.layers.3.mlp
+vision_model.encoder.layers.3.mlp.act
+vision_model.encoder.layers.3.mlp.fc1
+vision_model.encoder.layers.3.mlp.fc2
+vision_model.encoder.layers.3.norm1
+vision_model.encoder.layers.3.norm2
+vision_model.encoder.layers.3.drop_path1
+vision_model.encoder.layers.3.drop_path2
+vision_model.encoder.layers.4
+vision_model.encoder.layers.4.attn
+vision_model.encoder.layers.4.attn.qkv
+vision_model.encoder.layers.4.attn.attn_drop
+vision_model.encoder.layers.4.attn.proj_drop
+vision_model.encoder.layers.4.attn.proj
+vision_model.encoder.layers.4.mlp
+vision_model.encoder.layers.4.mlp.act
+vision_model.encoder.layers.4.mlp.fc1
+vision_model.encoder.layers.4.mlp.fc2
+vision_model.encoder.layers.4.norm1
+vision_model.encoder.layers.4.norm2
+vision_model.encoder.layers.4.drop_path1
+vision_model.encoder.layers.4.drop_path2
+vision_model.encoder.layers.5
+vision_model.encoder.layers.5.attn
+vision_model.encoder.layers.5.attn.qkv
+vision_model.encoder.layers.5.attn.attn_drop
+vision_model.encoder.layers.5.attn.proj_drop
+vision_model.encoder.layers.5.attn.proj
+vision_model.encoder.layers.5.mlp
+vision_model.encoder.layers.5.mlp.act
+vision_model.encoder.layers.5.mlp.fc1
+vision_model.encoder.layers.5.mlp.fc2
+vision_model.encoder.layers.5.norm1
+vision_model.encoder.layers.5.norm2
+vision_model.encoder.layers.5.drop_path1
+vision_model.encoder.layers.5.drop_path2
+vision_model.encoder.layers.6
+vision_model.encoder.layers.6.attn
+vision_model.encoder.layers.6.attn.qkv
+vision_model.encoder.layers.6.attn.attn_drop
+vision_model.encoder.layers.6.attn.proj_drop
+vision_model.encoder.layers.6.attn.proj
+vision_model.encoder.layers.6.mlp
+vision_model.encoder.layers.6.mlp.act
+vision_model.encoder.layers.6.mlp.fc1
+vision_model.encoder.layers.6.mlp.fc2
+vision_model.encoder.layers.6.norm1
+vision_model.encoder.layers.6.norm2
+vision_model.encoder.layers.6.drop_path1
+vision_model.encoder.layers.6.drop_path2
+vision_model.encoder.layers.7
+vision_model.encoder.layers.7.attn
+vision_model.encoder.layers.7.attn.qkv
+vision_model.encoder.layers.7.attn.attn_drop
+vision_model.encoder.layers.7.attn.proj_drop
+vision_model.encoder.layers.7.attn.proj
+vision_model.encoder.layers.7.mlp
+vision_model.encoder.layers.7.mlp.act
+vision_model.encoder.layers.7.mlp.fc1
+vision_model.encoder.layers.7.mlp.fc2
+vision_model.encoder.layers.7.norm1
+vision_model.encoder.layers.7.norm2
+vision_model.encoder.layers.7.drop_path1
+vision_model.encoder.layers.7.drop_path2
+vision_model.encoder.layers.8
+vision_model.encoder.layers.8.attn
+vision_model.encoder.layers.8.attn.qkv
+vision_model.encoder.layers.8.attn.attn_drop
+vision_model.encoder.layers.8.attn.proj_drop
+vision_model.encoder.layers.8.attn.proj
+vision_model.encoder.layers.8.mlp
+vision_model.encoder.layers.8.mlp.act
+vision_model.encoder.layers.8.mlp.fc1
+vision_model.encoder.layers.8.mlp.fc2
+vision_model.encoder.layers.8.norm1
+vision_model.encoder.layers.8.norm2
+vision_model.encoder.layers.8.drop_path1
+vision_model.encoder.layers.8.drop_path2
+vision_model.encoder.layers.9
+vision_model.encoder.layers.9.attn
+vision_model.encoder.layers.9.attn.qkv
+vision_model.encoder.layers.9.attn.attn_drop
+vision_model.encoder.layers.9.attn.proj_drop
+vision_model.encoder.layers.9.attn.proj
+vision_model.encoder.layers.9.mlp
+vision_model.encoder.layers.9.mlp.act
+vision_model.encoder.layers.9.mlp.fc1
+vision_model.encoder.layers.9.mlp.fc2
+vision_model.encoder.layers.9.norm1
+vision_model.encoder.layers.9.norm2
+vision_model.encoder.layers.9.drop_path1
+vision_model.encoder.layers.9.drop_path2
+vision_model.encoder.layers.10
+vision_model.encoder.layers.10.attn
+vision_model.encoder.layers.10.attn.qkv
+vision_model.encoder.layers.10.attn.attn_drop
+vision_model.encoder.layers.10.attn.proj_drop
+vision_model.encoder.layers.10.attn.proj
+vision_model.encoder.layers.10.mlp
+vision_model.encoder.layers.10.mlp.act
+vision_model.encoder.layers.10.mlp.fc1
+vision_model.encoder.layers.10.mlp.fc2
+vision_model.encoder.layers.10.norm1
+vision_model.encoder.layers.10.norm2
+vision_model.encoder.layers.10.drop_path1
+vision_model.encoder.layers.10.drop_path2
+vision_model.encoder.layers.11
+vision_model.encoder.layers.11.attn
+vision_model.encoder.layers.11.attn.qkv
+vision_model.encoder.layers.11.attn.attn_drop
+vision_model.encoder.layers.11.attn.proj_drop
+vision_model.encoder.layers.11.attn.proj
+vision_model.encoder.layers.11.mlp
+vision_model.encoder.layers.11.mlp.act
+vision_model.encoder.layers.11.mlp.fc1
+vision_model.encoder.layers.11.mlp.fc2
+vision_model.encoder.layers.11.norm1
+vision_model.encoder.layers.11.norm2
+vision_model.encoder.layers.11.drop_path1
+vision_model.encoder.layers.11.drop_path2
+vision_model.encoder.layers.12
+vision_model.encoder.layers.12.attn
+vision_model.encoder.layers.12.attn.qkv
+vision_model.encoder.layers.12.attn.attn_drop
+vision_model.encoder.layers.12.attn.proj_drop
+vision_model.encoder.layers.12.attn.proj
+vision_model.encoder.layers.12.mlp
+vision_model.encoder.layers.12.mlp.act
+vision_model.encoder.layers.12.mlp.fc1
+vision_model.encoder.layers.12.mlp.fc2
+vision_model.encoder.layers.12.norm1
+vision_model.encoder.layers.12.norm2
+vision_model.encoder.layers.12.drop_path1
+vision_model.encoder.layers.12.drop_path2
+vision_model.encoder.layers.13
+vision_model.encoder.layers.13.attn
+vision_model.encoder.layers.13.attn.qkv
+vision_model.encoder.layers.13.attn.attn_drop
+vision_model.encoder.layers.13.attn.proj_drop
+vision_model.encoder.layers.13.attn.proj
+vision_model.encoder.layers.13.mlp
+vision_model.encoder.layers.13.mlp.act
+vision_model.encoder.layers.13.mlp.fc1
+vision_model.encoder.layers.13.mlp.fc2
+vision_model.encoder.layers.13.norm1
+vision_model.encoder.layers.13.norm2
+vision_model.encoder.layers.13.drop_path1
+vision_model.encoder.layers.13.drop_path2
+vision_model.encoder.layers.14
+vision_model.encoder.layers.14.attn
+vision_model.encoder.layers.14.attn.qkv
+vision_model.encoder.layers.14.attn.attn_drop
+vision_model.encoder.layers.14.attn.proj_drop
+vision_model.encoder.layers.14.attn.proj
+vision_model.encoder.layers.14.mlp
+vision_model.encoder.layers.14.mlp.act
+vision_model.encoder.layers.14.mlp.fc1
+vision_model.encoder.layers.14.mlp.fc2
+vision_model.encoder.layers.14.norm1
+vision_model.encoder.layers.14.norm2
+vision_model.encoder.layers.14.drop_path1
+vision_model.encoder.layers.14.drop_path2
+vision_model.encoder.layers.15
+vision_model.encoder.layers.15.attn
+vision_model.encoder.layers.15.attn.qkv
+vision_model.encoder.layers.15.attn.attn_drop
+vision_model.encoder.layers.15.attn.proj_drop
+vision_model.encoder.layers.15.attn.proj
+vision_model.encoder.layers.15.mlp
+vision_model.encoder.layers.15.mlp.act
+vision_model.encoder.layers.15.mlp.fc1
+vision_model.encoder.layers.15.mlp.fc2
+vision_model.encoder.layers.15.norm1
+vision_model.encoder.layers.15.norm2
+vision_model.encoder.layers.15.drop_path1
+vision_model.encoder.layers.15.drop_path2
+vision_model.encoder.layers.16
+vision_model.encoder.layers.16.attn
+vision_model.encoder.layers.16.attn.qkv
+vision_model.encoder.layers.16.attn.attn_drop
+vision_model.encoder.layers.16.attn.proj_drop
+vision_model.encoder.layers.16.attn.proj
+vision_model.encoder.layers.16.mlp
+vision_model.encoder.layers.16.mlp.act
+vision_model.encoder.layers.16.mlp.fc1
+vision_model.encoder.layers.16.mlp.fc2
+vision_model.encoder.layers.16.norm1
+vision_model.encoder.layers.16.norm2
+vision_model.encoder.layers.16.drop_path1
+vision_model.encoder.layers.16.drop_path2
+vision_model.encoder.layers.17
+vision_model.encoder.layers.17.attn
+vision_model.encoder.layers.17.attn.qkv
+vision_model.encoder.layers.17.attn.attn_drop
+vision_model.encoder.layers.17.attn.proj_drop
+vision_model.encoder.layers.17.attn.proj
+vision_model.encoder.layers.17.mlp
+vision_model.encoder.layers.17.mlp.act
+vision_model.encoder.layers.17.mlp.fc1
+vision_model.encoder.layers.17.mlp.fc2
+vision_model.encoder.layers.17.norm1
+vision_model.encoder.layers.17.norm2
+vision_model.encoder.layers.17.drop_path1
+vision_model.encoder.layers.17.drop_path2
+vision_model.encoder.layers.18
+vision_model.encoder.layers.18.attn
+vision_model.encoder.layers.18.attn.qkv
+vision_model.encoder.layers.18.attn.attn_drop
+vision_model.encoder.layers.18.attn.proj_drop
+vision_model.encoder.layers.18.attn.proj
+vision_model.encoder.layers.18.mlp
+vision_model.encoder.layers.18.mlp.act
+vision_model.encoder.layers.18.mlp.fc1
+vision_model.encoder.layers.18.mlp.fc2
+vision_model.encoder.layers.18.norm1
+vision_model.encoder.layers.18.norm2
+vision_model.encoder.layers.18.drop_path1
+vision_model.encoder.layers.18.drop_path2
+vision_model.encoder.layers.19
+vision_model.encoder.layers.19.attn
+vision_model.encoder.layers.19.attn.qkv
+vision_model.encoder.layers.19.attn.attn_drop
+vision_model.encoder.layers.19.attn.proj_drop
+vision_model.encoder.layers.19.attn.proj
+vision_model.encoder.layers.19.mlp
+vision_model.encoder.layers.19.mlp.act
+vision_model.encoder.layers.19.mlp.fc1
+vision_model.encoder.layers.19.mlp.fc2
+vision_model.encoder.layers.19.norm1
+vision_model.encoder.layers.19.norm2
+vision_model.encoder.layers.19.drop_path1
+vision_model.encoder.layers.19.drop_path2
+vision_model.encoder.layers.20
+vision_model.encoder.layers.20.attn
+vision_model.encoder.layers.20.attn.qkv
+vision_model.encoder.layers.20.attn.attn_drop
+vision_model.encoder.layers.20.attn.proj_drop
+vision_model.encoder.layers.20.attn.proj
+vision_model.encoder.layers.20.mlp
+vision_model.encoder.layers.20.mlp.act
+vision_model.encoder.layers.20.mlp.fc1
+vision_model.encoder.layers.20.mlp.fc2
+vision_model.encoder.layers.20.norm1
+vision_model.encoder.layers.20.norm2
+vision_model.encoder.layers.20.drop_path1
+vision_model.encoder.layers.20.drop_path2
+vision_model.encoder.layers.21
+vision_model.encoder.layers.21.attn
+vision_model.encoder.layers.21.attn.qkv
+vision_model.encoder.layers.21.attn.attn_drop
+vision_model.encoder.layers.21.attn.proj_drop
+vision_model.encoder.layers.21.attn.proj
+vision_model.encoder.layers.21.mlp
+vision_model.encoder.layers.21.mlp.act
+vision_model.encoder.layers.21.mlp.fc1
+vision_model.encoder.layers.21.mlp.fc2
+vision_model.encoder.layers.21.norm1
+vision_model.encoder.layers.21.norm2
+vision_model.encoder.layers.21.drop_path1
+vision_model.encoder.layers.21.drop_path2
+vision_model.encoder.layers.22
+vision_model.encoder.layers.22.attn
+vision_model.encoder.layers.22.attn.qkv
+vision_model.encoder.layers.22.attn.attn_drop
+vision_model.encoder.layers.22.attn.proj_drop
+vision_model.encoder.layers.22.attn.proj
+vision_model.encoder.layers.22.mlp
+vision_model.encoder.layers.22.mlp.act
+vision_model.encoder.layers.22.mlp.fc1
+vision_model.encoder.layers.22.mlp.fc2
+vision_model.encoder.layers.22.norm1
+vision_model.encoder.layers.22.norm2
+vision_model.encoder.layers.22.drop_path1
+vision_model.encoder.layers.22.drop_path2
+vision_model.encoder.layers.23
+vision_model.encoder.layers.23.attn
+vision_model.encoder.layers.23.attn.qkv
+vision_model.encoder.layers.23.attn.attn_drop
+vision_model.encoder.layers.23.attn.proj_drop
+vision_model.encoder.layers.23.attn.proj
+vision_model.encoder.layers.23.mlp
+vision_model.encoder.layers.23.mlp.act
+vision_model.encoder.layers.23.mlp.fc1
+vision_model.encoder.layers.23.mlp.fc2
+vision_model.encoder.layers.23.norm1
+vision_model.encoder.layers.23.norm2
+vision_model.encoder.layers.23.drop_path1
+vision_model.encoder.layers.23.drop_path2
+language_model
+language_model.model
+language_model.model.tok_embeddings
+language_model.model.layers
+language_model.model.layers.0
+language_model.model.layers.0.attention
+language_model.model.layers.0.attention.wqkv
+language_model.model.layers.0.attention.wo
+language_model.model.layers.0.attention.rotary_emb
+language_model.model.layers.0.feed_forward
+language_model.model.layers.0.feed_forward.w1
+language_model.model.layers.0.feed_forward.w3
+language_model.model.layers.0.feed_forward.w2
+language_model.model.layers.0.feed_forward.act_fn
+language_model.model.layers.0.attention_norm
+language_model.model.layers.0.ffn_norm
+language_model.model.layers.1
+language_model.model.layers.1.attention
+language_model.model.layers.1.attention.wqkv
+language_model.model.layers.1.attention.wo
+language_model.model.layers.1.attention.rotary_emb
+language_model.model.layers.1.feed_forward
+language_model.model.layers.1.feed_forward.w1
+language_model.model.layers.1.feed_forward.w3
+language_model.model.layers.1.feed_forward.w2
+language_model.model.layers.1.feed_forward.act_fn
+language_model.model.layers.1.attention_norm
+language_model.model.layers.1.ffn_norm
+language_model.model.layers.2
+language_model.model.layers.2.attention
+language_model.model.layers.2.attention.wqkv
+language_model.model.layers.2.attention.wo
+language_model.model.layers.2.attention.rotary_emb
+language_model.model.layers.2.feed_forward
+language_model.model.layers.2.feed_forward.w1
+language_model.model.layers.2.feed_forward.w3
+language_model.model.layers.2.feed_forward.w2
+language_model.model.layers.2.feed_forward.act_fn
+language_model.model.layers.2.attention_norm
+language_model.model.layers.2.ffn_norm
+language_model.model.layers.3
+language_model.model.layers.3.attention
+language_model.model.layers.3.attention.wqkv
+language_model.model.layers.3.attention.wo
+language_model.model.layers.3.attention.rotary_emb
+language_model.model.layers.3.feed_forward
+language_model.model.layers.3.feed_forward.w1
+language_model.model.layers.3.feed_forward.w3
+language_model.model.layers.3.feed_forward.w2
+language_model.model.layers.3.feed_forward.act_fn
+language_model.model.layers.3.attention_norm
+language_model.model.layers.3.ffn_norm
+language_model.model.layers.4
+language_model.model.layers.4.attention
+language_model.model.layers.4.attention.wqkv
+language_model.model.layers.4.attention.wo
+language_model.model.layers.4.attention.rotary_emb
+language_model.model.layers.4.feed_forward
+language_model.model.layers.4.feed_forward.w1
+language_model.model.layers.4.feed_forward.w3
+language_model.model.layers.4.feed_forward.w2
+language_model.model.layers.4.feed_forward.act_fn
+language_model.model.layers.4.attention_norm
+language_model.model.layers.4.ffn_norm
+language_model.model.layers.5
+language_model.model.layers.5.attention
+language_model.model.layers.5.attention.wqkv
+language_model.model.layers.5.attention.wo
+language_model.model.layers.5.attention.rotary_emb
+language_model.model.layers.5.feed_forward
+language_model.model.layers.5.feed_forward.w1
+language_model.model.layers.5.feed_forward.w3
+language_model.model.layers.5.feed_forward.w2
+language_model.model.layers.5.feed_forward.act_fn
+language_model.model.layers.5.attention_norm
+language_model.model.layers.5.ffn_norm
+language_model.model.layers.6
+language_model.model.layers.6.attention
+language_model.model.layers.6.attention.wqkv
+language_model.model.layers.6.attention.wo
+language_model.model.layers.6.attention.rotary_emb
+language_model.model.layers.6.feed_forward
+language_model.model.layers.6.feed_forward.w1
+language_model.model.layers.6.feed_forward.w3
+language_model.model.layers.6.feed_forward.w2
+language_model.model.layers.6.feed_forward.act_fn
+language_model.model.layers.6.attention_norm
+language_model.model.layers.6.ffn_norm
+language_model.model.layers.7
+language_model.model.layers.7.attention
+language_model.model.layers.7.attention.wqkv
+language_model.model.layers.7.attention.wo
+language_model.model.layers.7.attention.rotary_emb
+language_model.model.layers.7.feed_forward
+language_model.model.layers.7.feed_forward.w1
+language_model.model.layers.7.feed_forward.w3
+language_model.model.layers.7.feed_forward.w2
+language_model.model.layers.7.feed_forward.act_fn
+language_model.model.layers.7.attention_norm
+language_model.model.layers.7.ffn_norm
+language_model.model.layers.8
+language_model.model.layers.8.attention
+language_model.model.layers.8.attention.wqkv
+language_model.model.layers.8.attention.wo
+language_model.model.layers.8.attention.rotary_emb
+language_model.model.layers.8.feed_forward
+language_model.model.layers.8.feed_forward.w1
+language_model.model.layers.8.feed_forward.w3
+language_model.model.layers.8.feed_forward.w2
+language_model.model.layers.8.feed_forward.act_fn
+language_model.model.layers.8.attention_norm
+language_model.model.layers.8.ffn_norm
+language_model.model.layers.9
+language_model.model.layers.9.attention
+language_model.model.layers.9.attention.wqkv
+language_model.model.layers.9.attention.wo
+language_model.model.layers.9.attention.rotary_emb
+language_model.model.layers.9.feed_forward
+language_model.model.layers.9.feed_forward.w1
+language_model.model.layers.9.feed_forward.w3
+language_model.model.layers.9.feed_forward.w2
+language_model.model.layers.9.feed_forward.act_fn
+language_model.model.layers.9.attention_norm
+language_model.model.layers.9.ffn_norm
+language_model.model.layers.10
+language_model.model.layers.10.attention
+language_model.model.layers.10.attention.wqkv
+language_model.model.layers.10.attention.wo
+language_model.model.layers.10.attention.rotary_emb
+language_model.model.layers.10.feed_forward
+language_model.model.layers.10.feed_forward.w1
+language_model.model.layers.10.feed_forward.w3
+language_model.model.layers.10.feed_forward.w2
+language_model.model.layers.10.feed_forward.act_fn
+language_model.model.layers.10.attention_norm
+language_model.model.layers.10.ffn_norm
+language_model.model.layers.11
+language_model.model.layers.11.attention
+language_model.model.layers.11.attention.wqkv
+language_model.model.layers.11.attention.wo
+language_model.model.layers.11.attention.rotary_emb
+language_model.model.layers.11.feed_forward
+language_model.model.layers.11.feed_forward.w1
+language_model.model.layers.11.feed_forward.w3
+language_model.model.layers.11.feed_forward.w2
+language_model.model.layers.11.feed_forward.act_fn
+language_model.model.layers.11.attention_norm
+language_model.model.layers.11.ffn_norm
+language_model.model.layers.12
+language_model.model.layers.12.attention
+language_model.model.layers.12.attention.wqkv
+language_model.model.layers.12.attention.wo
+language_model.model.layers.12.attention.rotary_emb
+language_model.model.layers.12.feed_forward
+language_model.model.layers.12.feed_forward.w1
+language_model.model.layers.12.feed_forward.w3
+language_model.model.layers.12.feed_forward.w2
+language_model.model.layers.12.feed_forward.act_fn
+language_model.model.layers.12.attention_norm
+language_model.model.layers.12.ffn_norm
+language_model.model.layers.13
+language_model.model.layers.13.attention
+language_model.model.layers.13.attention.wqkv
+language_model.model.layers.13.attention.wo
+language_model.model.layers.13.attention.rotary_emb
+language_model.model.layers.13.feed_forward
+language_model.model.layers.13.feed_forward.w1
+language_model.model.layers.13.feed_forward.w3
+language_model.model.layers.13.feed_forward.w2
+language_model.model.layers.13.feed_forward.act_fn
+language_model.model.layers.13.attention_norm
+language_model.model.layers.13.ffn_norm
+language_model.model.layers.14
+language_model.model.layers.14.attention
+language_model.model.layers.14.attention.wqkv
+language_model.model.layers.14.attention.wo
+language_model.model.layers.14.attention.rotary_emb
+language_model.model.layers.14.feed_forward
+language_model.model.layers.14.feed_forward.w1
+language_model.model.layers.14.feed_forward.w3
+language_model.model.layers.14.feed_forward.w2
+language_model.model.layers.14.feed_forward.act_fn
+language_model.model.layers.14.attention_norm
+language_model.model.layers.14.ffn_norm
+language_model.model.layers.15
+language_model.model.layers.15.attention
+language_model.model.layers.15.attention.wqkv
+language_model.model.layers.15.attention.wo
+language_model.model.layers.15.attention.rotary_emb
+language_model.model.layers.15.feed_forward
+language_model.model.layers.15.feed_forward.w1
+language_model.model.layers.15.feed_forward.w3
+language_model.model.layers.15.feed_forward.w2
+language_model.model.layers.15.feed_forward.act_fn
+language_model.model.layers.15.attention_norm
+language_model.model.layers.15.ffn_norm
+language_model.model.layers.16
+language_model.model.layers.16.attention
+language_model.model.layers.16.attention.wqkv
+language_model.model.layers.16.attention.wo
+language_model.model.layers.16.attention.rotary_emb
+language_model.model.layers.16.feed_forward
+language_model.model.layers.16.feed_forward.w1
+language_model.model.layers.16.feed_forward.w3
+language_model.model.layers.16.feed_forward.w2
+language_model.model.layers.16.feed_forward.act_fn
+language_model.model.layers.16.attention_norm
+language_model.model.layers.16.ffn_norm
+language_model.model.layers.17
+language_model.model.layers.17.attention
+language_model.model.layers.17.attention.wqkv
+language_model.model.layers.17.attention.wo
+language_model.model.layers.17.attention.rotary_emb
+language_model.model.layers.17.feed_forward
+language_model.model.layers.17.feed_forward.w1
+language_model.model.layers.17.feed_forward.w3
+language_model.model.layers.17.feed_forward.w2
+language_model.model.layers.17.feed_forward.act_fn
+language_model.model.layers.17.attention_norm
+language_model.model.layers.17.ffn_norm
+language_model.model.layers.18
+language_model.model.layers.18.attention
+language_model.model.layers.18.attention.wqkv
+language_model.model.layers.18.attention.wo
+language_model.model.layers.18.attention.rotary_emb
+language_model.model.layers.18.feed_forward
+language_model.model.layers.18.feed_forward.w1
+language_model.model.layers.18.feed_forward.w3
+language_model.model.layers.18.feed_forward.w2
+language_model.model.layers.18.feed_forward.act_fn
+language_model.model.layers.18.attention_norm
+language_model.model.layers.18.ffn_norm
+language_model.model.layers.19
+language_model.model.layers.19.attention
+language_model.model.layers.19.attention.wqkv
+language_model.model.layers.19.attention.wo
+language_model.model.layers.19.attention.rotary_emb
+language_model.model.layers.19.feed_forward
+language_model.model.layers.19.feed_forward.w1
+language_model.model.layers.19.feed_forward.w3
+language_model.model.layers.19.feed_forward.w2
+language_model.model.layers.19.feed_forward.act_fn
+language_model.model.layers.19.attention_norm
+language_model.model.layers.19.ffn_norm
+language_model.model.layers.20
+language_model.model.layers.20.attention
+language_model.model.layers.20.attention.wqkv
+language_model.model.layers.20.attention.wo
+language_model.model.layers.20.attention.rotary_emb
+language_model.model.layers.20.feed_forward
+language_model.model.layers.20.feed_forward.w1
+language_model.model.layers.20.feed_forward.w3
+language_model.model.layers.20.feed_forward.w2
+language_model.model.layers.20.feed_forward.act_fn
+language_model.model.layers.20.attention_norm
+language_model.model.layers.20.ffn_norm
+language_model.model.layers.21
+language_model.model.layers.21.attention
+language_model.model.layers.21.attention.wqkv
+language_model.model.layers.21.attention.wo
+language_model.model.layers.21.attention.rotary_emb
+language_model.model.layers.21.feed_forward
+language_model.model.layers.21.feed_forward.w1
+language_model.model.layers.21.feed_forward.w3
+language_model.model.layers.21.feed_forward.w2
+language_model.model.layers.21.feed_forward.act_fn
+language_model.model.layers.21.attention_norm
+language_model.model.layers.21.ffn_norm
+language_model.model.layers.22
+language_model.model.layers.22.attention
+language_model.model.layers.22.attention.wqkv
+language_model.model.layers.22.attention.wo
+language_model.model.layers.22.attention.rotary_emb
+language_model.model.layers.22.feed_forward
+language_model.model.layers.22.feed_forward.w1
+language_model.model.layers.22.feed_forward.w3
+language_model.model.layers.22.feed_forward.w2
+language_model.model.layers.22.feed_forward.act_fn
+language_model.model.layers.22.attention_norm
+language_model.model.layers.22.ffn_norm
+language_model.model.layers.23
+language_model.model.layers.23.attention
+language_model.model.layers.23.attention.wqkv
+language_model.model.layers.23.attention.wo
+language_model.model.layers.23.attention.rotary_emb
+language_model.model.layers.23.feed_forward
+language_model.model.layers.23.feed_forward.w1
+language_model.model.layers.23.feed_forward.w3
+language_model.model.layers.23.feed_forward.w2
+language_model.model.layers.23.feed_forward.act_fn
+language_model.model.layers.23.attention_norm
+language_model.model.layers.23.ffn_norm
+language_model.model.layers.24
+language_model.model.layers.24.attention
+language_model.model.layers.24.attention.wqkv
+language_model.model.layers.24.attention.wo
+language_model.model.layers.24.attention.rotary_emb
+language_model.model.layers.24.feed_forward
+language_model.model.layers.24.feed_forward.w1
+language_model.model.layers.24.feed_forward.w3
+language_model.model.layers.24.feed_forward.w2
+language_model.model.layers.24.feed_forward.act_fn
+language_model.model.layers.24.attention_norm
+language_model.model.layers.24.ffn_norm
+language_model.model.layers.25
+language_model.model.layers.25.attention
+language_model.model.layers.25.attention.wqkv
+language_model.model.layers.25.attention.wo
+language_model.model.layers.25.attention.rotary_emb
+language_model.model.layers.25.feed_forward
+language_model.model.layers.25.feed_forward.w1
+language_model.model.layers.25.feed_forward.w3
+language_model.model.layers.25.feed_forward.w2
+language_model.model.layers.25.feed_forward.act_fn
+language_model.model.layers.25.attention_norm
+language_model.model.layers.25.ffn_norm
+language_model.model.layers.26
+language_model.model.layers.26.attention
+language_model.model.layers.26.attention.wqkv
+language_model.model.layers.26.attention.wo
+language_model.model.layers.26.attention.rotary_emb
+language_model.model.layers.26.feed_forward
+language_model.model.layers.26.feed_forward.w1
+language_model.model.layers.26.feed_forward.w3
+language_model.model.layers.26.feed_forward.w2
+language_model.model.layers.26.feed_forward.act_fn
+language_model.model.layers.26.attention_norm
+language_model.model.layers.26.ffn_norm
+language_model.model.layers.27
+language_model.model.layers.27.attention
+language_model.model.layers.27.attention.wqkv
+language_model.model.layers.27.attention.wo
+language_model.model.layers.27.attention.rotary_emb
+language_model.model.layers.27.feed_forward
+language_model.model.layers.27.feed_forward.w1
+language_model.model.layers.27.feed_forward.w3
+language_model.model.layers.27.feed_forward.w2
+language_model.model.layers.27.feed_forward.act_fn
+language_model.model.layers.27.attention_norm
+language_model.model.layers.27.ffn_norm
+language_model.model.layers.28
+language_model.model.layers.28.attention
+language_model.model.layers.28.attention.wqkv
+language_model.model.layers.28.attention.wo
+language_model.model.layers.28.attention.rotary_emb
+language_model.model.layers.28.feed_forward
+language_model.model.layers.28.feed_forward.w1
+language_model.model.layers.28.feed_forward.w3
+language_model.model.layers.28.feed_forward.w2
+language_model.model.layers.28.feed_forward.act_fn
+language_model.model.layers.28.attention_norm
+language_model.model.layers.28.ffn_norm
+language_model.model.layers.29
+language_model.model.layers.29.attention
+language_model.model.layers.29.attention.wqkv
+language_model.model.layers.29.attention.wo
+language_model.model.layers.29.attention.rotary_emb
+language_model.model.layers.29.feed_forward
+language_model.model.layers.29.feed_forward.w1
+language_model.model.layers.29.feed_forward.w3
+language_model.model.layers.29.feed_forward.w2
+language_model.model.layers.29.feed_forward.act_fn
+language_model.model.layers.29.attention_norm
+language_model.model.layers.29.ffn_norm
+language_model.model.layers.30
+language_model.model.layers.30.attention
+language_model.model.layers.30.attention.wqkv
+language_model.model.layers.30.attention.wo
+language_model.model.layers.30.attention.rotary_emb
+language_model.model.layers.30.feed_forward
+language_model.model.layers.30.feed_forward.w1
+language_model.model.layers.30.feed_forward.w3
+language_model.model.layers.30.feed_forward.w2
+language_model.model.layers.30.feed_forward.act_fn
+language_model.model.layers.30.attention_norm
+language_model.model.layers.30.ffn_norm
+language_model.model.layers.31
+language_model.model.layers.31.attention
+language_model.model.layers.31.attention.wqkv
+language_model.model.layers.31.attention.wo
+language_model.model.layers.31.attention.rotary_emb
+language_model.model.layers.31.feed_forward
+language_model.model.layers.31.feed_forward.w1
+language_model.model.layers.31.feed_forward.w3
+language_model.model.layers.31.feed_forward.w2
+language_model.model.layers.31.feed_forward.act_fn
+language_model.model.layers.31.attention_norm
+language_model.model.layers.31.ffn_norm
+language_model.model.norm
+language_model.output
+mlp1
+mlp1.0
+mlp1.1
+mlp1.2
+mlp1.3
diff --git a/logs/llava-hf/llava-1.5-7b-hf.txt b/logs/llava-hf/llava-1.5-7b-hf.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e5cbc6c3271d8275e8cdd364914f3491d100fb85
--- /dev/null
+++ b/logs/llava-hf/llava-1.5-7b-hf.txt
@@ -0,0 +1,725 @@
+
+vision_tower
+vision_tower.vision_model
+vision_tower.vision_model.embeddings
+vision_tower.vision_model.embeddings.patch_embedding
+vision_tower.vision_model.embeddings.position_embedding
+vision_tower.vision_model.pre_layrnorm
+vision_tower.vision_model.encoder
+vision_tower.vision_model.encoder.layers
+vision_tower.vision_model.encoder.layers.0
+vision_tower.vision_model.encoder.layers.0.self_attn
+vision_tower.vision_model.encoder.layers.0.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.0.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.0.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.0.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.0.layer_norm1
+vision_tower.vision_model.encoder.layers.0.mlp
+vision_tower.vision_model.encoder.layers.0.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.0.mlp.fc1
+vision_tower.vision_model.encoder.layers.0.mlp.fc2
+vision_tower.vision_model.encoder.layers.0.layer_norm2
+vision_tower.vision_model.encoder.layers.1
+vision_tower.vision_model.encoder.layers.1.self_attn
+vision_tower.vision_model.encoder.layers.1.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.1.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.1.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.1.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.1.layer_norm1
+vision_tower.vision_model.encoder.layers.1.mlp
+vision_tower.vision_model.encoder.layers.1.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.1.mlp.fc1
+vision_tower.vision_model.encoder.layers.1.mlp.fc2
+vision_tower.vision_model.encoder.layers.1.layer_norm2
+vision_tower.vision_model.encoder.layers.2
+vision_tower.vision_model.encoder.layers.2.self_attn
+vision_tower.vision_model.encoder.layers.2.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.2.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.2.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.2.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.2.layer_norm1
+vision_tower.vision_model.encoder.layers.2.mlp
+vision_tower.vision_model.encoder.layers.2.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.2.mlp.fc1
+vision_tower.vision_model.encoder.layers.2.mlp.fc2
+vision_tower.vision_model.encoder.layers.2.layer_norm2
+vision_tower.vision_model.encoder.layers.3
+vision_tower.vision_model.encoder.layers.3.self_attn
+vision_tower.vision_model.encoder.layers.3.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.3.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.3.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.3.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.3.layer_norm1
+vision_tower.vision_model.encoder.layers.3.mlp
+vision_tower.vision_model.encoder.layers.3.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.3.mlp.fc1
+vision_tower.vision_model.encoder.layers.3.mlp.fc2
+vision_tower.vision_model.encoder.layers.3.layer_norm2
+vision_tower.vision_model.encoder.layers.4
+vision_tower.vision_model.encoder.layers.4.self_attn
+vision_tower.vision_model.encoder.layers.4.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.4.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.4.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.4.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.4.layer_norm1
+vision_tower.vision_model.encoder.layers.4.mlp
+vision_tower.vision_model.encoder.layers.4.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.4.mlp.fc1
+vision_tower.vision_model.encoder.layers.4.mlp.fc2
+vision_tower.vision_model.encoder.layers.4.layer_norm2
+vision_tower.vision_model.encoder.layers.5
+vision_tower.vision_model.encoder.layers.5.self_attn
+vision_tower.vision_model.encoder.layers.5.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.5.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.5.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.5.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.5.layer_norm1
+vision_tower.vision_model.encoder.layers.5.mlp
+vision_tower.vision_model.encoder.layers.5.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.5.mlp.fc1
+vision_tower.vision_model.encoder.layers.5.mlp.fc2
+vision_tower.vision_model.encoder.layers.5.layer_norm2
+vision_tower.vision_model.encoder.layers.6
+vision_tower.vision_model.encoder.layers.6.self_attn
+vision_tower.vision_model.encoder.layers.6.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.6.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.6.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.6.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.6.layer_norm1
+vision_tower.vision_model.encoder.layers.6.mlp
+vision_tower.vision_model.encoder.layers.6.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.6.mlp.fc1
+vision_tower.vision_model.encoder.layers.6.mlp.fc2
+vision_tower.vision_model.encoder.layers.6.layer_norm2
+vision_tower.vision_model.encoder.layers.7
+vision_tower.vision_model.encoder.layers.7.self_attn
+vision_tower.vision_model.encoder.layers.7.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.7.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.7.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.7.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.7.layer_norm1
+vision_tower.vision_model.encoder.layers.7.mlp
+vision_tower.vision_model.encoder.layers.7.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.7.mlp.fc1
+vision_tower.vision_model.encoder.layers.7.mlp.fc2
+vision_tower.vision_model.encoder.layers.7.layer_norm2
+vision_tower.vision_model.encoder.layers.8
+vision_tower.vision_model.encoder.layers.8.self_attn
+vision_tower.vision_model.encoder.layers.8.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.8.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.8.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.8.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.8.layer_norm1
+vision_tower.vision_model.encoder.layers.8.mlp
+vision_tower.vision_model.encoder.layers.8.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.8.mlp.fc1
+vision_tower.vision_model.encoder.layers.8.mlp.fc2
+vision_tower.vision_model.encoder.layers.8.layer_norm2
+vision_tower.vision_model.encoder.layers.9
+vision_tower.vision_model.encoder.layers.9.self_attn
+vision_tower.vision_model.encoder.layers.9.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.9.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.9.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.9.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.9.layer_norm1
+vision_tower.vision_model.encoder.layers.9.mlp
+vision_tower.vision_model.encoder.layers.9.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.9.mlp.fc1
+vision_tower.vision_model.encoder.layers.9.mlp.fc2
+vision_tower.vision_model.encoder.layers.9.layer_norm2
+vision_tower.vision_model.encoder.layers.10
+vision_tower.vision_model.encoder.layers.10.self_attn
+vision_tower.vision_model.encoder.layers.10.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.10.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.10.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.10.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.10.layer_norm1
+vision_tower.vision_model.encoder.layers.10.mlp
+vision_tower.vision_model.encoder.layers.10.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.10.mlp.fc1
+vision_tower.vision_model.encoder.layers.10.mlp.fc2
+vision_tower.vision_model.encoder.layers.10.layer_norm2
+vision_tower.vision_model.encoder.layers.11
+vision_tower.vision_model.encoder.layers.11.self_attn
+vision_tower.vision_model.encoder.layers.11.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.11.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.11.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.11.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.11.layer_norm1
+vision_tower.vision_model.encoder.layers.11.mlp
+vision_tower.vision_model.encoder.layers.11.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.11.mlp.fc1
+vision_tower.vision_model.encoder.layers.11.mlp.fc2
+vision_tower.vision_model.encoder.layers.11.layer_norm2
+vision_tower.vision_model.encoder.layers.12
+vision_tower.vision_model.encoder.layers.12.self_attn
+vision_tower.vision_model.encoder.layers.12.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.12.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.12.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.12.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.12.layer_norm1
+vision_tower.vision_model.encoder.layers.12.mlp
+vision_tower.vision_model.encoder.layers.12.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.12.mlp.fc1
+vision_tower.vision_model.encoder.layers.12.mlp.fc2
+vision_tower.vision_model.encoder.layers.12.layer_norm2
+vision_tower.vision_model.encoder.layers.13
+vision_tower.vision_model.encoder.layers.13.self_attn
+vision_tower.vision_model.encoder.layers.13.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.13.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.13.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.13.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.13.layer_norm1
+vision_tower.vision_model.encoder.layers.13.mlp
+vision_tower.vision_model.encoder.layers.13.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.13.mlp.fc1
+vision_tower.vision_model.encoder.layers.13.mlp.fc2
+vision_tower.vision_model.encoder.layers.13.layer_norm2
+vision_tower.vision_model.encoder.layers.14
+vision_tower.vision_model.encoder.layers.14.self_attn
+vision_tower.vision_model.encoder.layers.14.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.14.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.14.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.14.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.14.layer_norm1
+vision_tower.vision_model.encoder.layers.14.mlp
+vision_tower.vision_model.encoder.layers.14.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.14.mlp.fc1
+vision_tower.vision_model.encoder.layers.14.mlp.fc2
+vision_tower.vision_model.encoder.layers.14.layer_norm2
+vision_tower.vision_model.encoder.layers.15
+vision_tower.vision_model.encoder.layers.15.self_attn
+vision_tower.vision_model.encoder.layers.15.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.15.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.15.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.15.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.15.layer_norm1
+vision_tower.vision_model.encoder.layers.15.mlp
+vision_tower.vision_model.encoder.layers.15.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.15.mlp.fc1
+vision_tower.vision_model.encoder.layers.15.mlp.fc2
+vision_tower.vision_model.encoder.layers.15.layer_norm2
+vision_tower.vision_model.encoder.layers.16
+vision_tower.vision_model.encoder.layers.16.self_attn
+vision_tower.vision_model.encoder.layers.16.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.16.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.16.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.16.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.16.layer_norm1
+vision_tower.vision_model.encoder.layers.16.mlp
+vision_tower.vision_model.encoder.layers.16.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.16.mlp.fc1
+vision_tower.vision_model.encoder.layers.16.mlp.fc2
+vision_tower.vision_model.encoder.layers.16.layer_norm2
+vision_tower.vision_model.encoder.layers.17
+vision_tower.vision_model.encoder.layers.17.self_attn
+vision_tower.vision_model.encoder.layers.17.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.17.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.17.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.17.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.17.layer_norm1
+vision_tower.vision_model.encoder.layers.17.mlp
+vision_tower.vision_model.encoder.layers.17.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.17.mlp.fc1
+vision_tower.vision_model.encoder.layers.17.mlp.fc2
+vision_tower.vision_model.encoder.layers.17.layer_norm2
+vision_tower.vision_model.encoder.layers.18
+vision_tower.vision_model.encoder.layers.18.self_attn
+vision_tower.vision_model.encoder.layers.18.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.18.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.18.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.18.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.18.layer_norm1
+vision_tower.vision_model.encoder.layers.18.mlp
+vision_tower.vision_model.encoder.layers.18.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.18.mlp.fc1
+vision_tower.vision_model.encoder.layers.18.mlp.fc2
+vision_tower.vision_model.encoder.layers.18.layer_norm2
+vision_tower.vision_model.encoder.layers.19
+vision_tower.vision_model.encoder.layers.19.self_attn
+vision_tower.vision_model.encoder.layers.19.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.19.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.19.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.19.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.19.layer_norm1
+vision_tower.vision_model.encoder.layers.19.mlp
+vision_tower.vision_model.encoder.layers.19.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.19.mlp.fc1
+vision_tower.vision_model.encoder.layers.19.mlp.fc2
+vision_tower.vision_model.encoder.layers.19.layer_norm2
+vision_tower.vision_model.encoder.layers.20
+vision_tower.vision_model.encoder.layers.20.self_attn
+vision_tower.vision_model.encoder.layers.20.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.20.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.20.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.20.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.20.layer_norm1
+vision_tower.vision_model.encoder.layers.20.mlp
+vision_tower.vision_model.encoder.layers.20.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.20.mlp.fc1
+vision_tower.vision_model.encoder.layers.20.mlp.fc2
+vision_tower.vision_model.encoder.layers.20.layer_norm2
+vision_tower.vision_model.encoder.layers.21
+vision_tower.vision_model.encoder.layers.21.self_attn
+vision_tower.vision_model.encoder.layers.21.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.21.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.21.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.21.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.21.layer_norm1
+vision_tower.vision_model.encoder.layers.21.mlp
+vision_tower.vision_model.encoder.layers.21.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.21.mlp.fc1
+vision_tower.vision_model.encoder.layers.21.mlp.fc2
+vision_tower.vision_model.encoder.layers.21.layer_norm2
+vision_tower.vision_model.encoder.layers.22
+vision_tower.vision_model.encoder.layers.22.self_attn
+vision_tower.vision_model.encoder.layers.22.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.22.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.22.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.22.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.22.layer_norm1
+vision_tower.vision_model.encoder.layers.22.mlp
+vision_tower.vision_model.encoder.layers.22.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.22.mlp.fc1
+vision_tower.vision_model.encoder.layers.22.mlp.fc2
+vision_tower.vision_model.encoder.layers.22.layer_norm2
+vision_tower.vision_model.encoder.layers.23
+vision_tower.vision_model.encoder.layers.23.self_attn
+vision_tower.vision_model.encoder.layers.23.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.23.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.23.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.23.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.23.layer_norm1
+vision_tower.vision_model.encoder.layers.23.mlp
+vision_tower.vision_model.encoder.layers.23.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.23.mlp.fc1
+vision_tower.vision_model.encoder.layers.23.mlp.fc2
+vision_tower.vision_model.encoder.layers.23.layer_norm2
+vision_tower.vision_model.post_layernorm
+multi_modal_projector
+multi_modal_projector.linear_1
+multi_modal_projector.act
+multi_modal_projector.linear_2
+language_model
+language_model.model
+language_model.model.embed_tokens
+language_model.model.layers
+language_model.model.layers.0
+language_model.model.layers.0.self_attn
+language_model.model.layers.0.self_attn.q_proj
+language_model.model.layers.0.self_attn.k_proj
+language_model.model.layers.0.self_attn.v_proj
+language_model.model.layers.0.self_attn.o_proj
+language_model.model.layers.0.mlp
+language_model.model.layers.0.mlp.gate_proj
+language_model.model.layers.0.mlp.up_proj
+language_model.model.layers.0.mlp.down_proj
+language_model.model.layers.0.mlp.act_fn
+language_model.model.layers.0.input_layernorm
+language_model.model.layers.0.post_attention_layernorm
+language_model.model.layers.1
+language_model.model.layers.1.self_attn
+language_model.model.layers.1.self_attn.q_proj
+language_model.model.layers.1.self_attn.k_proj
+language_model.model.layers.1.self_attn.v_proj
+language_model.model.layers.1.self_attn.o_proj
+language_model.model.layers.1.mlp
+language_model.model.layers.1.mlp.gate_proj
+language_model.model.layers.1.mlp.up_proj
+language_model.model.layers.1.mlp.down_proj
+language_model.model.layers.1.mlp.act_fn
+language_model.model.layers.1.input_layernorm
+language_model.model.layers.1.post_attention_layernorm
+language_model.model.layers.2
+language_model.model.layers.2.self_attn
+language_model.model.layers.2.self_attn.q_proj
+language_model.model.layers.2.self_attn.k_proj
+language_model.model.layers.2.self_attn.v_proj
+language_model.model.layers.2.self_attn.o_proj
+language_model.model.layers.2.mlp
+language_model.model.layers.2.mlp.gate_proj
+language_model.model.layers.2.mlp.up_proj
+language_model.model.layers.2.mlp.down_proj
+language_model.model.layers.2.mlp.act_fn
+language_model.model.layers.2.input_layernorm
+language_model.model.layers.2.post_attention_layernorm
+language_model.model.layers.3
+language_model.model.layers.3.self_attn
+language_model.model.layers.3.self_attn.q_proj
+language_model.model.layers.3.self_attn.k_proj
+language_model.model.layers.3.self_attn.v_proj
+language_model.model.layers.3.self_attn.o_proj
+language_model.model.layers.3.mlp
+language_model.model.layers.3.mlp.gate_proj
+language_model.model.layers.3.mlp.up_proj
+language_model.model.layers.3.mlp.down_proj
+language_model.model.layers.3.mlp.act_fn
+language_model.model.layers.3.input_layernorm
+language_model.model.layers.3.post_attention_layernorm
+language_model.model.layers.4
+language_model.model.layers.4.self_attn
+language_model.model.layers.4.self_attn.q_proj
+language_model.model.layers.4.self_attn.k_proj
+language_model.model.layers.4.self_attn.v_proj
+language_model.model.layers.4.self_attn.o_proj
+language_model.model.layers.4.mlp
+language_model.model.layers.4.mlp.gate_proj
+language_model.model.layers.4.mlp.up_proj
+language_model.model.layers.4.mlp.down_proj
+language_model.model.layers.4.mlp.act_fn
+language_model.model.layers.4.input_layernorm
+language_model.model.layers.4.post_attention_layernorm
+language_model.model.layers.5
+language_model.model.layers.5.self_attn
+language_model.model.layers.5.self_attn.q_proj
+language_model.model.layers.5.self_attn.k_proj
+language_model.model.layers.5.self_attn.v_proj
+language_model.model.layers.5.self_attn.o_proj
+language_model.model.layers.5.mlp
+language_model.model.layers.5.mlp.gate_proj
+language_model.model.layers.5.mlp.up_proj
+language_model.model.layers.5.mlp.down_proj
+language_model.model.layers.5.mlp.act_fn
+language_model.model.layers.5.input_layernorm
+language_model.model.layers.5.post_attention_layernorm
+language_model.model.layers.6
+language_model.model.layers.6.self_attn
+language_model.model.layers.6.self_attn.q_proj
+language_model.model.layers.6.self_attn.k_proj
+language_model.model.layers.6.self_attn.v_proj
+language_model.model.layers.6.self_attn.o_proj
+language_model.model.layers.6.mlp
+language_model.model.layers.6.mlp.gate_proj
+language_model.model.layers.6.mlp.up_proj
+language_model.model.layers.6.mlp.down_proj
+language_model.model.layers.6.mlp.act_fn
+language_model.model.layers.6.input_layernorm
+language_model.model.layers.6.post_attention_layernorm
+language_model.model.layers.7
+language_model.model.layers.7.self_attn
+language_model.model.layers.7.self_attn.q_proj
+language_model.model.layers.7.self_attn.k_proj
+language_model.model.layers.7.self_attn.v_proj
+language_model.model.layers.7.self_attn.o_proj
+language_model.model.layers.7.mlp
+language_model.model.layers.7.mlp.gate_proj
+language_model.model.layers.7.mlp.up_proj
+language_model.model.layers.7.mlp.down_proj
+language_model.model.layers.7.mlp.act_fn
+language_model.model.layers.7.input_layernorm
+language_model.model.layers.7.post_attention_layernorm
+language_model.model.layers.8
+language_model.model.layers.8.self_attn
+language_model.model.layers.8.self_attn.q_proj
+language_model.model.layers.8.self_attn.k_proj
+language_model.model.layers.8.self_attn.v_proj
+language_model.model.layers.8.self_attn.o_proj
+language_model.model.layers.8.mlp
+language_model.model.layers.8.mlp.gate_proj
+language_model.model.layers.8.mlp.up_proj
+language_model.model.layers.8.mlp.down_proj
+language_model.model.layers.8.mlp.act_fn
+language_model.model.layers.8.input_layernorm
+language_model.model.layers.8.post_attention_layernorm
+language_model.model.layers.9
+language_model.model.layers.9.self_attn
+language_model.model.layers.9.self_attn.q_proj
+language_model.model.layers.9.self_attn.k_proj
+language_model.model.layers.9.self_attn.v_proj
+language_model.model.layers.9.self_attn.o_proj
+language_model.model.layers.9.mlp
+language_model.model.layers.9.mlp.gate_proj
+language_model.model.layers.9.mlp.up_proj
+language_model.model.layers.9.mlp.down_proj
+language_model.model.layers.9.mlp.act_fn
+language_model.model.layers.9.input_layernorm
+language_model.model.layers.9.post_attention_layernorm
+language_model.model.layers.10
+language_model.model.layers.10.self_attn
+language_model.model.layers.10.self_attn.q_proj
+language_model.model.layers.10.self_attn.k_proj
+language_model.model.layers.10.self_attn.v_proj
+language_model.model.layers.10.self_attn.o_proj
+language_model.model.layers.10.mlp
+language_model.model.layers.10.mlp.gate_proj
+language_model.model.layers.10.mlp.up_proj
+language_model.model.layers.10.mlp.down_proj
+language_model.model.layers.10.mlp.act_fn
+language_model.model.layers.10.input_layernorm
+language_model.model.layers.10.post_attention_layernorm
+language_model.model.layers.11
+language_model.model.layers.11.self_attn
+language_model.model.layers.11.self_attn.q_proj
+language_model.model.layers.11.self_attn.k_proj
+language_model.model.layers.11.self_attn.v_proj
+language_model.model.layers.11.self_attn.o_proj
+language_model.model.layers.11.mlp
+language_model.model.layers.11.mlp.gate_proj
+language_model.model.layers.11.mlp.up_proj
+language_model.model.layers.11.mlp.down_proj
+language_model.model.layers.11.mlp.act_fn
+language_model.model.layers.11.input_layernorm
+language_model.model.layers.11.post_attention_layernorm
+language_model.model.layers.12
+language_model.model.layers.12.self_attn
+language_model.model.layers.12.self_attn.q_proj
+language_model.model.layers.12.self_attn.k_proj
+language_model.model.layers.12.self_attn.v_proj
+language_model.model.layers.12.self_attn.o_proj
+language_model.model.layers.12.mlp
+language_model.model.layers.12.mlp.gate_proj
+language_model.model.layers.12.mlp.up_proj
+language_model.model.layers.12.mlp.down_proj
+language_model.model.layers.12.mlp.act_fn
+language_model.model.layers.12.input_layernorm
+language_model.model.layers.12.post_attention_layernorm
+language_model.model.layers.13
+language_model.model.layers.13.self_attn
+language_model.model.layers.13.self_attn.q_proj
+language_model.model.layers.13.self_attn.k_proj
+language_model.model.layers.13.self_attn.v_proj
+language_model.model.layers.13.self_attn.o_proj
+language_model.model.layers.13.mlp
+language_model.model.layers.13.mlp.gate_proj
+language_model.model.layers.13.mlp.up_proj
+language_model.model.layers.13.mlp.down_proj
+language_model.model.layers.13.mlp.act_fn
+language_model.model.layers.13.input_layernorm
+language_model.model.layers.13.post_attention_layernorm
+language_model.model.layers.14
+language_model.model.layers.14.self_attn
+language_model.model.layers.14.self_attn.q_proj
+language_model.model.layers.14.self_attn.k_proj
+language_model.model.layers.14.self_attn.v_proj
+language_model.model.layers.14.self_attn.o_proj
+language_model.model.layers.14.mlp
+language_model.model.layers.14.mlp.gate_proj
+language_model.model.layers.14.mlp.up_proj
+language_model.model.layers.14.mlp.down_proj
+language_model.model.layers.14.mlp.act_fn
+language_model.model.layers.14.input_layernorm
+language_model.model.layers.14.post_attention_layernorm
+language_model.model.layers.15
+language_model.model.layers.15.self_attn
+language_model.model.layers.15.self_attn.q_proj
+language_model.model.layers.15.self_attn.k_proj
+language_model.model.layers.15.self_attn.v_proj
+language_model.model.layers.15.self_attn.o_proj
+language_model.model.layers.15.mlp
+language_model.model.layers.15.mlp.gate_proj
+language_model.model.layers.15.mlp.up_proj
+language_model.model.layers.15.mlp.down_proj
+language_model.model.layers.15.mlp.act_fn
+language_model.model.layers.15.input_layernorm
+language_model.model.layers.15.post_attention_layernorm
+language_model.model.layers.16
+language_model.model.layers.16.self_attn
+language_model.model.layers.16.self_attn.q_proj
+language_model.model.layers.16.self_attn.k_proj
+language_model.model.layers.16.self_attn.v_proj
+language_model.model.layers.16.self_attn.o_proj
+language_model.model.layers.16.mlp
+language_model.model.layers.16.mlp.gate_proj
+language_model.model.layers.16.mlp.up_proj
+language_model.model.layers.16.mlp.down_proj
+language_model.model.layers.16.mlp.act_fn
+language_model.model.layers.16.input_layernorm
+language_model.model.layers.16.post_attention_layernorm
+language_model.model.layers.17
+language_model.model.layers.17.self_attn
+language_model.model.layers.17.self_attn.q_proj
+language_model.model.layers.17.self_attn.k_proj
+language_model.model.layers.17.self_attn.v_proj
+language_model.model.layers.17.self_attn.o_proj
+language_model.model.layers.17.mlp
+language_model.model.layers.17.mlp.gate_proj
+language_model.model.layers.17.mlp.up_proj
+language_model.model.layers.17.mlp.down_proj
+language_model.model.layers.17.mlp.act_fn
+language_model.model.layers.17.input_layernorm
+language_model.model.layers.17.post_attention_layernorm
+language_model.model.layers.18
+language_model.model.layers.18.self_attn
+language_model.model.layers.18.self_attn.q_proj
+language_model.model.layers.18.self_attn.k_proj
+language_model.model.layers.18.self_attn.v_proj
+language_model.model.layers.18.self_attn.o_proj
+language_model.model.layers.18.mlp
+language_model.model.layers.18.mlp.gate_proj
+language_model.model.layers.18.mlp.up_proj
+language_model.model.layers.18.mlp.down_proj
+language_model.model.layers.18.mlp.act_fn
+language_model.model.layers.18.input_layernorm
+language_model.model.layers.18.post_attention_layernorm
+language_model.model.layers.19
+language_model.model.layers.19.self_attn
+language_model.model.layers.19.self_attn.q_proj
+language_model.model.layers.19.self_attn.k_proj
+language_model.model.layers.19.self_attn.v_proj
+language_model.model.layers.19.self_attn.o_proj
+language_model.model.layers.19.mlp
+language_model.model.layers.19.mlp.gate_proj
+language_model.model.layers.19.mlp.up_proj
+language_model.model.layers.19.mlp.down_proj
+language_model.model.layers.19.mlp.act_fn
+language_model.model.layers.19.input_layernorm
+language_model.model.layers.19.post_attention_layernorm
+language_model.model.layers.20
+language_model.model.layers.20.self_attn
+language_model.model.layers.20.self_attn.q_proj
+language_model.model.layers.20.self_attn.k_proj
+language_model.model.layers.20.self_attn.v_proj
+language_model.model.layers.20.self_attn.o_proj
+language_model.model.layers.20.mlp
+language_model.model.layers.20.mlp.gate_proj
+language_model.model.layers.20.mlp.up_proj
+language_model.model.layers.20.mlp.down_proj
+language_model.model.layers.20.mlp.act_fn
+language_model.model.layers.20.input_layernorm
+language_model.model.layers.20.post_attention_layernorm
+language_model.model.layers.21
+language_model.model.layers.21.self_attn
+language_model.model.layers.21.self_attn.q_proj
+language_model.model.layers.21.self_attn.k_proj
+language_model.model.layers.21.self_attn.v_proj
+language_model.model.layers.21.self_attn.o_proj
+language_model.model.layers.21.mlp
+language_model.model.layers.21.mlp.gate_proj
+language_model.model.layers.21.mlp.up_proj
+language_model.model.layers.21.mlp.down_proj
+language_model.model.layers.21.mlp.act_fn
+language_model.model.layers.21.input_layernorm
+language_model.model.layers.21.post_attention_layernorm
+language_model.model.layers.22
+language_model.model.layers.22.self_attn
+language_model.model.layers.22.self_attn.q_proj
+language_model.model.layers.22.self_attn.k_proj
+language_model.model.layers.22.self_attn.v_proj
+language_model.model.layers.22.self_attn.o_proj
+language_model.model.layers.22.mlp
+language_model.model.layers.22.mlp.gate_proj
+language_model.model.layers.22.mlp.up_proj
+language_model.model.layers.22.mlp.down_proj
+language_model.model.layers.22.mlp.act_fn
+language_model.model.layers.22.input_layernorm
+language_model.model.layers.22.post_attention_layernorm
+language_model.model.layers.23
+language_model.model.layers.23.self_attn
+language_model.model.layers.23.self_attn.q_proj
+language_model.model.layers.23.self_attn.k_proj
+language_model.model.layers.23.self_attn.v_proj
+language_model.model.layers.23.self_attn.o_proj
+language_model.model.layers.23.mlp
+language_model.model.layers.23.mlp.gate_proj
+language_model.model.layers.23.mlp.up_proj
+language_model.model.layers.23.mlp.down_proj
+language_model.model.layers.23.mlp.act_fn
+language_model.model.layers.23.input_layernorm
+language_model.model.layers.23.post_attention_layernorm
+language_model.model.layers.24
+language_model.model.layers.24.self_attn
+language_model.model.layers.24.self_attn.q_proj
+language_model.model.layers.24.self_attn.k_proj
+language_model.model.layers.24.self_attn.v_proj
+language_model.model.layers.24.self_attn.o_proj
+language_model.model.layers.24.mlp
+language_model.model.layers.24.mlp.gate_proj
+language_model.model.layers.24.mlp.up_proj
+language_model.model.layers.24.mlp.down_proj
+language_model.model.layers.24.mlp.act_fn
+language_model.model.layers.24.input_layernorm
+language_model.model.layers.24.post_attention_layernorm
+language_model.model.layers.25
+language_model.model.layers.25.self_attn
+language_model.model.layers.25.self_attn.q_proj
+language_model.model.layers.25.self_attn.k_proj
+language_model.model.layers.25.self_attn.v_proj
+language_model.model.layers.25.self_attn.o_proj
+language_model.model.layers.25.mlp
+language_model.model.layers.25.mlp.gate_proj
+language_model.model.layers.25.mlp.up_proj
+language_model.model.layers.25.mlp.down_proj
+language_model.model.layers.25.mlp.act_fn
+language_model.model.layers.25.input_layernorm
+language_model.model.layers.25.post_attention_layernorm
+language_model.model.layers.26
+language_model.model.layers.26.self_attn
+language_model.model.layers.26.self_attn.q_proj
+language_model.model.layers.26.self_attn.k_proj
+language_model.model.layers.26.self_attn.v_proj
+language_model.model.layers.26.self_attn.o_proj
+language_model.model.layers.26.mlp
+language_model.model.layers.26.mlp.gate_proj
+language_model.model.layers.26.mlp.up_proj
+language_model.model.layers.26.mlp.down_proj
+language_model.model.layers.26.mlp.act_fn
+language_model.model.layers.26.input_layernorm
+language_model.model.layers.26.post_attention_layernorm
+language_model.model.layers.27
+language_model.model.layers.27.self_attn
+language_model.model.layers.27.self_attn.q_proj
+language_model.model.layers.27.self_attn.k_proj
+language_model.model.layers.27.self_attn.v_proj
+language_model.model.layers.27.self_attn.o_proj
+language_model.model.layers.27.mlp
+language_model.model.layers.27.mlp.gate_proj
+language_model.model.layers.27.mlp.up_proj
+language_model.model.layers.27.mlp.down_proj
+language_model.model.layers.27.mlp.act_fn
+language_model.model.layers.27.input_layernorm
+language_model.model.layers.27.post_attention_layernorm
+language_model.model.layers.28
+language_model.model.layers.28.self_attn
+language_model.model.layers.28.self_attn.q_proj
+language_model.model.layers.28.self_attn.k_proj
+language_model.model.layers.28.self_attn.v_proj
+language_model.model.layers.28.self_attn.o_proj
+language_model.model.layers.28.mlp
+language_model.model.layers.28.mlp.gate_proj
+language_model.model.layers.28.mlp.up_proj
+language_model.model.layers.28.mlp.down_proj
+language_model.model.layers.28.mlp.act_fn
+language_model.model.layers.28.input_layernorm
+language_model.model.layers.28.post_attention_layernorm
+language_model.model.layers.29
+language_model.model.layers.29.self_attn
+language_model.model.layers.29.self_attn.q_proj
+language_model.model.layers.29.self_attn.k_proj
+language_model.model.layers.29.self_attn.v_proj
+language_model.model.layers.29.self_attn.o_proj
+language_model.model.layers.29.mlp
+language_model.model.layers.29.mlp.gate_proj
+language_model.model.layers.29.mlp.up_proj
+language_model.model.layers.29.mlp.down_proj
+language_model.model.layers.29.mlp.act_fn
+language_model.model.layers.29.input_layernorm
+language_model.model.layers.29.post_attention_layernorm
+language_model.model.layers.30
+language_model.model.layers.30.self_attn
+language_model.model.layers.30.self_attn.q_proj
+language_model.model.layers.30.self_attn.k_proj
+language_model.model.layers.30.self_attn.v_proj
+language_model.model.layers.30.self_attn.o_proj
+language_model.model.layers.30.mlp
+language_model.model.layers.30.mlp.gate_proj
+language_model.model.layers.30.mlp.up_proj
+language_model.model.layers.30.mlp.down_proj
+language_model.model.layers.30.mlp.act_fn
+language_model.model.layers.30.input_layernorm
+language_model.model.layers.30.post_attention_layernorm
+language_model.model.layers.31
+language_model.model.layers.31.self_attn
+language_model.model.layers.31.self_attn.q_proj
+language_model.model.layers.31.self_attn.k_proj
+language_model.model.layers.31.self_attn.v_proj
+language_model.model.layers.31.self_attn.o_proj
+language_model.model.layers.31.mlp
+language_model.model.layers.31.mlp.gate_proj
+language_model.model.layers.31.mlp.up_proj
+language_model.model.layers.31.mlp.down_proj
+language_model.model.layers.31.mlp.act_fn
+language_model.model.layers.31.input_layernorm
+language_model.model.layers.31.post_attention_layernorm
+language_model.model.norm
+language_model.model.rotary_emb
+language_model.lm_head
diff --git a/logs/mistralai/Pixtral-12B-2409.txt b/logs/mistralai/Pixtral-12B-2409.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e75871519018bb1b7849fc88b5f0696d3c53a7b
--- /dev/null
+++ b/logs/mistralai/Pixtral-12B-2409.txt
@@ -0,0 +1,782 @@
+
+tok_embeddings
+vision_encoder
+vision_encoder.patch_conv
+vision_encoder.ln_pre
+vision_encoder.transformer
+vision_encoder.transformer.layers
+vision_encoder.transformer.layers.0
+vision_encoder.transformer.layers.0.attention
+vision_encoder.transformer.layers.0.attention.wq
+vision_encoder.transformer.layers.0.attention.wk
+vision_encoder.transformer.layers.0.attention.wv
+vision_encoder.transformer.layers.0.attention.wo
+vision_encoder.transformer.layers.0.attention_norm
+vision_encoder.transformer.layers.0.ffn_norm
+vision_encoder.transformer.layers.0.feed_forward
+vision_encoder.transformer.layers.0.feed_forward.w1
+vision_encoder.transformer.layers.0.feed_forward.w2
+vision_encoder.transformer.layers.0.feed_forward.w3
+vision_encoder.transformer.layers.1
+vision_encoder.transformer.layers.1.attention
+vision_encoder.transformer.layers.1.attention.wq
+vision_encoder.transformer.layers.1.attention.wk
+vision_encoder.transformer.layers.1.attention.wv
+vision_encoder.transformer.layers.1.attention.wo
+vision_encoder.transformer.layers.1.attention_norm
+vision_encoder.transformer.layers.1.ffn_norm
+vision_encoder.transformer.layers.1.feed_forward
+vision_encoder.transformer.layers.1.feed_forward.w1
+vision_encoder.transformer.layers.1.feed_forward.w2
+vision_encoder.transformer.layers.1.feed_forward.w3
+vision_encoder.transformer.layers.2
+vision_encoder.transformer.layers.2.attention
+vision_encoder.transformer.layers.2.attention.wq
+vision_encoder.transformer.layers.2.attention.wk
+vision_encoder.transformer.layers.2.attention.wv
+vision_encoder.transformer.layers.2.attention.wo
+vision_encoder.transformer.layers.2.attention_norm
+vision_encoder.transformer.layers.2.ffn_norm
+vision_encoder.transformer.layers.2.feed_forward
+vision_encoder.transformer.layers.2.feed_forward.w1
+vision_encoder.transformer.layers.2.feed_forward.w2
+vision_encoder.transformer.layers.2.feed_forward.w3
+vision_encoder.transformer.layers.3
+vision_encoder.transformer.layers.3.attention
+vision_encoder.transformer.layers.3.attention.wq
+vision_encoder.transformer.layers.3.attention.wk
+vision_encoder.transformer.layers.3.attention.wv
+vision_encoder.transformer.layers.3.attention.wo
+vision_encoder.transformer.layers.3.attention_norm
+vision_encoder.transformer.layers.3.ffn_norm
+vision_encoder.transformer.layers.3.feed_forward
+vision_encoder.transformer.layers.3.feed_forward.w1
+vision_encoder.transformer.layers.3.feed_forward.w2
+vision_encoder.transformer.layers.3.feed_forward.w3
+vision_encoder.transformer.layers.4
+vision_encoder.transformer.layers.4.attention
+vision_encoder.transformer.layers.4.attention.wq
+vision_encoder.transformer.layers.4.attention.wk
+vision_encoder.transformer.layers.4.attention.wv
+vision_encoder.transformer.layers.4.attention.wo
+vision_encoder.transformer.layers.4.attention_norm
+vision_encoder.transformer.layers.4.ffn_norm
+vision_encoder.transformer.layers.4.feed_forward
+vision_encoder.transformer.layers.4.feed_forward.w1
+vision_encoder.transformer.layers.4.feed_forward.w2
+vision_encoder.transformer.layers.4.feed_forward.w3
+vision_encoder.transformer.layers.5
+vision_encoder.transformer.layers.5.attention
+vision_encoder.transformer.layers.5.attention.wq
+vision_encoder.transformer.layers.5.attention.wk
+vision_encoder.transformer.layers.5.attention.wv
+vision_encoder.transformer.layers.5.attention.wo
+vision_encoder.transformer.layers.5.attention_norm
+vision_encoder.transformer.layers.5.ffn_norm
+vision_encoder.transformer.layers.5.feed_forward
+vision_encoder.transformer.layers.5.feed_forward.w1
+vision_encoder.transformer.layers.5.feed_forward.w2
+vision_encoder.transformer.layers.5.feed_forward.w3
+vision_encoder.transformer.layers.6
+vision_encoder.transformer.layers.6.attention
+vision_encoder.transformer.layers.6.attention.wq
+vision_encoder.transformer.layers.6.attention.wk
+vision_encoder.transformer.layers.6.attention.wv
+vision_encoder.transformer.layers.6.attention.wo
+vision_encoder.transformer.layers.6.attention_norm
+vision_encoder.transformer.layers.6.ffn_norm
+vision_encoder.transformer.layers.6.feed_forward
+vision_encoder.transformer.layers.6.feed_forward.w1
+vision_encoder.transformer.layers.6.feed_forward.w2
+vision_encoder.transformer.layers.6.feed_forward.w3
+vision_encoder.transformer.layers.7
+vision_encoder.transformer.layers.7.attention
+vision_encoder.transformer.layers.7.attention.wq
+vision_encoder.transformer.layers.7.attention.wk
+vision_encoder.transformer.layers.7.attention.wv
+vision_encoder.transformer.layers.7.attention.wo
+vision_encoder.transformer.layers.7.attention_norm
+vision_encoder.transformer.layers.7.ffn_norm
+vision_encoder.transformer.layers.7.feed_forward
+vision_encoder.transformer.layers.7.feed_forward.w1
+vision_encoder.transformer.layers.7.feed_forward.w2
+vision_encoder.transformer.layers.7.feed_forward.w3
+vision_encoder.transformer.layers.8
+vision_encoder.transformer.layers.8.attention
+vision_encoder.transformer.layers.8.attention.wq
+vision_encoder.transformer.layers.8.attention.wk
+vision_encoder.transformer.layers.8.attention.wv
+vision_encoder.transformer.layers.8.attention.wo
+vision_encoder.transformer.layers.8.attention_norm
+vision_encoder.transformer.layers.8.ffn_norm
+vision_encoder.transformer.layers.8.feed_forward
+vision_encoder.transformer.layers.8.feed_forward.w1
+vision_encoder.transformer.layers.8.feed_forward.w2
+vision_encoder.transformer.layers.8.feed_forward.w3
+vision_encoder.transformer.layers.9
+vision_encoder.transformer.layers.9.attention
+vision_encoder.transformer.layers.9.attention.wq
+vision_encoder.transformer.layers.9.attention.wk
+vision_encoder.transformer.layers.9.attention.wv
+vision_encoder.transformer.layers.9.attention.wo
+vision_encoder.transformer.layers.9.attention_norm
+vision_encoder.transformer.layers.9.ffn_norm
+vision_encoder.transformer.layers.9.feed_forward
+vision_encoder.transformer.layers.9.feed_forward.w1
+vision_encoder.transformer.layers.9.feed_forward.w2
+vision_encoder.transformer.layers.9.feed_forward.w3
+vision_encoder.transformer.layers.10
+vision_encoder.transformer.layers.10.attention
+vision_encoder.transformer.layers.10.attention.wq
+vision_encoder.transformer.layers.10.attention.wk
+vision_encoder.transformer.layers.10.attention.wv
+vision_encoder.transformer.layers.10.attention.wo
+vision_encoder.transformer.layers.10.attention_norm
+vision_encoder.transformer.layers.10.ffn_norm
+vision_encoder.transformer.layers.10.feed_forward
+vision_encoder.transformer.layers.10.feed_forward.w1
+vision_encoder.transformer.layers.10.feed_forward.w2
+vision_encoder.transformer.layers.10.feed_forward.w3
+vision_encoder.transformer.layers.11
+vision_encoder.transformer.layers.11.attention
+vision_encoder.transformer.layers.11.attention.wq
+vision_encoder.transformer.layers.11.attention.wk
+vision_encoder.transformer.layers.11.attention.wv
+vision_encoder.transformer.layers.11.attention.wo
+vision_encoder.transformer.layers.11.attention_norm
+vision_encoder.transformer.layers.11.ffn_norm
+vision_encoder.transformer.layers.11.feed_forward
+vision_encoder.transformer.layers.11.feed_forward.w1
+vision_encoder.transformer.layers.11.feed_forward.w2
+vision_encoder.transformer.layers.11.feed_forward.w3
+vision_encoder.transformer.layers.12
+vision_encoder.transformer.layers.12.attention
+vision_encoder.transformer.layers.12.attention.wq
+vision_encoder.transformer.layers.12.attention.wk
+vision_encoder.transformer.layers.12.attention.wv
+vision_encoder.transformer.layers.12.attention.wo
+vision_encoder.transformer.layers.12.attention_norm
+vision_encoder.transformer.layers.12.ffn_norm
+vision_encoder.transformer.layers.12.feed_forward
+vision_encoder.transformer.layers.12.feed_forward.w1
+vision_encoder.transformer.layers.12.feed_forward.w2
+vision_encoder.transformer.layers.12.feed_forward.w3
+vision_encoder.transformer.layers.13
+vision_encoder.transformer.layers.13.attention
+vision_encoder.transformer.layers.13.attention.wq
+vision_encoder.transformer.layers.13.attention.wk
+vision_encoder.transformer.layers.13.attention.wv
+vision_encoder.transformer.layers.13.attention.wo
+vision_encoder.transformer.layers.13.attention_norm
+vision_encoder.transformer.layers.13.ffn_norm
+vision_encoder.transformer.layers.13.feed_forward
+vision_encoder.transformer.layers.13.feed_forward.w1
+vision_encoder.transformer.layers.13.feed_forward.w2
+vision_encoder.transformer.layers.13.feed_forward.w3
+vision_encoder.transformer.layers.14
+vision_encoder.transformer.layers.14.attention
+vision_encoder.transformer.layers.14.attention.wq
+vision_encoder.transformer.layers.14.attention.wk
+vision_encoder.transformer.layers.14.attention.wv
+vision_encoder.transformer.layers.14.attention.wo
+vision_encoder.transformer.layers.14.attention_norm
+vision_encoder.transformer.layers.14.ffn_norm
+vision_encoder.transformer.layers.14.feed_forward
+vision_encoder.transformer.layers.14.feed_forward.w1
+vision_encoder.transformer.layers.14.feed_forward.w2
+vision_encoder.transformer.layers.14.feed_forward.w3
+vision_encoder.transformer.layers.15
+vision_encoder.transformer.layers.15.attention
+vision_encoder.transformer.layers.15.attention.wq
+vision_encoder.transformer.layers.15.attention.wk
+vision_encoder.transformer.layers.15.attention.wv
+vision_encoder.transformer.layers.15.attention.wo
+vision_encoder.transformer.layers.15.attention_norm
+vision_encoder.transformer.layers.15.ffn_norm
+vision_encoder.transformer.layers.15.feed_forward
+vision_encoder.transformer.layers.15.feed_forward.w1
+vision_encoder.transformer.layers.15.feed_forward.w2
+vision_encoder.transformer.layers.15.feed_forward.w3
+vision_encoder.transformer.layers.16
+vision_encoder.transformer.layers.16.attention
+vision_encoder.transformer.layers.16.attention.wq
+vision_encoder.transformer.layers.16.attention.wk
+vision_encoder.transformer.layers.16.attention.wv
+vision_encoder.transformer.layers.16.attention.wo
+vision_encoder.transformer.layers.16.attention_norm
+vision_encoder.transformer.layers.16.ffn_norm
+vision_encoder.transformer.layers.16.feed_forward
+vision_encoder.transformer.layers.16.feed_forward.w1
+vision_encoder.transformer.layers.16.feed_forward.w2
+vision_encoder.transformer.layers.16.feed_forward.w3
+vision_encoder.transformer.layers.17
+vision_encoder.transformer.layers.17.attention
+vision_encoder.transformer.layers.17.attention.wq
+vision_encoder.transformer.layers.17.attention.wk
+vision_encoder.transformer.layers.17.attention.wv
+vision_encoder.transformer.layers.17.attention.wo
+vision_encoder.transformer.layers.17.attention_norm
+vision_encoder.transformer.layers.17.ffn_norm
+vision_encoder.transformer.layers.17.feed_forward
+vision_encoder.transformer.layers.17.feed_forward.w1
+vision_encoder.transformer.layers.17.feed_forward.w2
+vision_encoder.transformer.layers.17.feed_forward.w3
+vision_encoder.transformer.layers.18
+vision_encoder.transformer.layers.18.attention
+vision_encoder.transformer.layers.18.attention.wq
+vision_encoder.transformer.layers.18.attention.wk
+vision_encoder.transformer.layers.18.attention.wv
+vision_encoder.transformer.layers.18.attention.wo
+vision_encoder.transformer.layers.18.attention_norm
+vision_encoder.transformer.layers.18.ffn_norm
+vision_encoder.transformer.layers.18.feed_forward
+vision_encoder.transformer.layers.18.feed_forward.w1
+vision_encoder.transformer.layers.18.feed_forward.w2
+vision_encoder.transformer.layers.18.feed_forward.w3
+vision_encoder.transformer.layers.19
+vision_encoder.transformer.layers.19.attention
+vision_encoder.transformer.layers.19.attention.wq
+vision_encoder.transformer.layers.19.attention.wk
+vision_encoder.transformer.layers.19.attention.wv
+vision_encoder.transformer.layers.19.attention.wo
+vision_encoder.transformer.layers.19.attention_norm
+vision_encoder.transformer.layers.19.ffn_norm
+vision_encoder.transformer.layers.19.feed_forward
+vision_encoder.transformer.layers.19.feed_forward.w1
+vision_encoder.transformer.layers.19.feed_forward.w2
+vision_encoder.transformer.layers.19.feed_forward.w3
+vision_encoder.transformer.layers.20
+vision_encoder.transformer.layers.20.attention
+vision_encoder.transformer.layers.20.attention.wq
+vision_encoder.transformer.layers.20.attention.wk
+vision_encoder.transformer.layers.20.attention.wv
+vision_encoder.transformer.layers.20.attention.wo
+vision_encoder.transformer.layers.20.attention_norm
+vision_encoder.transformer.layers.20.ffn_norm
+vision_encoder.transformer.layers.20.feed_forward
+vision_encoder.transformer.layers.20.feed_forward.w1
+vision_encoder.transformer.layers.20.feed_forward.w2
+vision_encoder.transformer.layers.20.feed_forward.w3
+vision_encoder.transformer.layers.21
+vision_encoder.transformer.layers.21.attention
+vision_encoder.transformer.layers.21.attention.wq
+vision_encoder.transformer.layers.21.attention.wk
+vision_encoder.transformer.layers.21.attention.wv
+vision_encoder.transformer.layers.21.attention.wo
+vision_encoder.transformer.layers.21.attention_norm
+vision_encoder.transformer.layers.21.ffn_norm
+vision_encoder.transformer.layers.21.feed_forward
+vision_encoder.transformer.layers.21.feed_forward.w1
+vision_encoder.transformer.layers.21.feed_forward.w2
+vision_encoder.transformer.layers.21.feed_forward.w3
+vision_encoder.transformer.layers.22
+vision_encoder.transformer.layers.22.attention
+vision_encoder.transformer.layers.22.attention.wq
+vision_encoder.transformer.layers.22.attention.wk
+vision_encoder.transformer.layers.22.attention.wv
+vision_encoder.transformer.layers.22.attention.wo
+vision_encoder.transformer.layers.22.attention_norm
+vision_encoder.transformer.layers.22.ffn_norm
+vision_encoder.transformer.layers.22.feed_forward
+vision_encoder.transformer.layers.22.feed_forward.w1
+vision_encoder.transformer.layers.22.feed_forward.w2
+vision_encoder.transformer.layers.22.feed_forward.w3
+vision_encoder.transformer.layers.23
+vision_encoder.transformer.layers.23.attention
+vision_encoder.transformer.layers.23.attention.wq
+vision_encoder.transformer.layers.23.attention.wk
+vision_encoder.transformer.layers.23.attention.wv
+vision_encoder.transformer.layers.23.attention.wo
+vision_encoder.transformer.layers.23.attention_norm
+vision_encoder.transformer.layers.23.ffn_norm
+vision_encoder.transformer.layers.23.feed_forward
+vision_encoder.transformer.layers.23.feed_forward.w1
+vision_encoder.transformer.layers.23.feed_forward.w2
+vision_encoder.transformer.layers.23.feed_forward.w3
+vision_language_adapter
+vision_language_adapter.w_in
+vision_language_adapter.gelu
+vision_language_adapter.w_out
+norm
+output
+layers
+layers.0
+layers.0.attention
+layers.0.attention.wq
+layers.0.attention.wk
+layers.0.attention.wv
+layers.0.attention.wo
+layers.0.attention_norm
+layers.0.ffn_norm
+layers.0.feed_forward
+layers.0.feed_forward.w1
+layers.0.feed_forward.w2
+layers.0.feed_forward.w3
+layers.1
+layers.1.attention
+layers.1.attention.wq
+layers.1.attention.wk
+layers.1.attention.wv
+layers.1.attention.wo
+layers.1.attention_norm
+layers.1.ffn_norm
+layers.1.feed_forward
+layers.1.feed_forward.w1
+layers.1.feed_forward.w2
+layers.1.feed_forward.w3
+layers.2
+layers.2.attention
+layers.2.attention.wq
+layers.2.attention.wk
+layers.2.attention.wv
+layers.2.attention.wo
+layers.2.attention_norm
+layers.2.ffn_norm
+layers.2.feed_forward
+layers.2.feed_forward.w1
+layers.2.feed_forward.w2
+layers.2.feed_forward.w3
+layers.3
+layers.3.attention
+layers.3.attention.wq
+layers.3.attention.wk
+layers.3.attention.wv
+layers.3.attention.wo
+layers.3.attention_norm
+layers.3.ffn_norm
+layers.3.feed_forward
+layers.3.feed_forward.w1
+layers.3.feed_forward.w2
+layers.3.feed_forward.w3
+layers.4
+layers.4.attention
+layers.4.attention.wq
+layers.4.attention.wk
+layers.4.attention.wv
+layers.4.attention.wo
+layers.4.attention_norm
+layers.4.ffn_norm
+layers.4.feed_forward
+layers.4.feed_forward.w1
+layers.4.feed_forward.w2
+layers.4.feed_forward.w3
+layers.5
+layers.5.attention
+layers.5.attention.wq
+layers.5.attention.wk
+layers.5.attention.wv
+layers.5.attention.wo
+layers.5.attention_norm
+layers.5.ffn_norm
+layers.5.feed_forward
+layers.5.feed_forward.w1
+layers.5.feed_forward.w2
+layers.5.feed_forward.w3
+layers.6
+layers.6.attention
+layers.6.attention.wq
+layers.6.attention.wk
+layers.6.attention.wv
+layers.6.attention.wo
+layers.6.attention_norm
+layers.6.ffn_norm
+layers.6.feed_forward
+layers.6.feed_forward.w1
+layers.6.feed_forward.w2
+layers.6.feed_forward.w3
+layers.7
+layers.7.attention
+layers.7.attention.wq
+layers.7.attention.wk
+layers.7.attention.wv
+layers.7.attention.wo
+layers.7.attention_norm
+layers.7.ffn_norm
+layers.7.feed_forward
+layers.7.feed_forward.w1
+layers.7.feed_forward.w2
+layers.7.feed_forward.w3
+layers.8
+layers.8.attention
+layers.8.attention.wq
+layers.8.attention.wk
+layers.8.attention.wv
+layers.8.attention.wo
+layers.8.attention_norm
+layers.8.ffn_norm
+layers.8.feed_forward
+layers.8.feed_forward.w1
+layers.8.feed_forward.w2
+layers.8.feed_forward.w3
+layers.9
+layers.9.attention
+layers.9.attention.wq
+layers.9.attention.wk
+layers.9.attention.wv
+layers.9.attention.wo
+layers.9.attention_norm
+layers.9.ffn_norm
+layers.9.feed_forward
+layers.9.feed_forward.w1
+layers.9.feed_forward.w2
+layers.9.feed_forward.w3
+layers.10
+layers.10.attention
+layers.10.attention.wq
+layers.10.attention.wk
+layers.10.attention.wv
+layers.10.attention.wo
+layers.10.attention_norm
+layers.10.ffn_norm
+layers.10.feed_forward
+layers.10.feed_forward.w1
+layers.10.feed_forward.w2
+layers.10.feed_forward.w3
+layers.11
+layers.11.attention
+layers.11.attention.wq
+layers.11.attention.wk
+layers.11.attention.wv
+layers.11.attention.wo
+layers.11.attention_norm
+layers.11.ffn_norm
+layers.11.feed_forward
+layers.11.feed_forward.w1
+layers.11.feed_forward.w2
+layers.11.feed_forward.w3
+layers.12
+layers.12.attention
+layers.12.attention.wq
+layers.12.attention.wk
+layers.12.attention.wv
+layers.12.attention.wo
+layers.12.attention_norm
+layers.12.ffn_norm
+layers.12.feed_forward
+layers.12.feed_forward.w1
+layers.12.feed_forward.w2
+layers.12.feed_forward.w3
+layers.13
+layers.13.attention
+layers.13.attention.wq
+layers.13.attention.wk
+layers.13.attention.wv
+layers.13.attention.wo
+layers.13.attention_norm
+layers.13.ffn_norm
+layers.13.feed_forward
+layers.13.feed_forward.w1
+layers.13.feed_forward.w2
+layers.13.feed_forward.w3
+layers.14
+layers.14.attention
+layers.14.attention.wq
+layers.14.attention.wk
+layers.14.attention.wv
+layers.14.attention.wo
+layers.14.attention_norm
+layers.14.ffn_norm
+layers.14.feed_forward
+layers.14.feed_forward.w1
+layers.14.feed_forward.w2
+layers.14.feed_forward.w3
+layers.15
+layers.15.attention
+layers.15.attention.wq
+layers.15.attention.wk
+layers.15.attention.wv
+layers.15.attention.wo
+layers.15.attention_norm
+layers.15.ffn_norm
+layers.15.feed_forward
+layers.15.feed_forward.w1
+layers.15.feed_forward.w2
+layers.15.feed_forward.w3
+layers.16
+layers.16.attention
+layers.16.attention.wq
+layers.16.attention.wk
+layers.16.attention.wv
+layers.16.attention.wo
+layers.16.attention_norm
+layers.16.ffn_norm
+layers.16.feed_forward
+layers.16.feed_forward.w1
+layers.16.feed_forward.w2
+layers.16.feed_forward.w3
+layers.17
+layers.17.attention
+layers.17.attention.wq
+layers.17.attention.wk
+layers.17.attention.wv
+layers.17.attention.wo
+layers.17.attention_norm
+layers.17.ffn_norm
+layers.17.feed_forward
+layers.17.feed_forward.w1
+layers.17.feed_forward.w2
+layers.17.feed_forward.w3
+layers.18
+layers.18.attention
+layers.18.attention.wq
+layers.18.attention.wk
+layers.18.attention.wv
+layers.18.attention.wo
+layers.18.attention_norm
+layers.18.ffn_norm
+layers.18.feed_forward
+layers.18.feed_forward.w1
+layers.18.feed_forward.w2
+layers.18.feed_forward.w3
+layers.19
+layers.19.attention
+layers.19.attention.wq
+layers.19.attention.wk
+layers.19.attention.wv
+layers.19.attention.wo
+layers.19.attention_norm
+layers.19.ffn_norm
+layers.19.feed_forward
+layers.19.feed_forward.w1
+layers.19.feed_forward.w2
+layers.19.feed_forward.w3
+layers.20
+layers.20.attention
+layers.20.attention.wq
+layers.20.attention.wk
+layers.20.attention.wv
+layers.20.attention.wo
+layers.20.attention_norm
+layers.20.ffn_norm
+layers.20.feed_forward
+layers.20.feed_forward.w1
+layers.20.feed_forward.w2
+layers.20.feed_forward.w3
+layers.21
+layers.21.attention
+layers.21.attention.wq
+layers.21.attention.wk
+layers.21.attention.wv
+layers.21.attention.wo
+layers.21.attention_norm
+layers.21.ffn_norm
+layers.21.feed_forward
+layers.21.feed_forward.w1
+layers.21.feed_forward.w2
+layers.21.feed_forward.w3
+layers.22
+layers.22.attention
+layers.22.attention.wq
+layers.22.attention.wk
+layers.22.attention.wv
+layers.22.attention.wo
+layers.22.attention_norm
+layers.22.ffn_norm
+layers.22.feed_forward
+layers.22.feed_forward.w1
+layers.22.feed_forward.w2
+layers.22.feed_forward.w3
+layers.23
+layers.23.attention
+layers.23.attention.wq
+layers.23.attention.wk
+layers.23.attention.wv
+layers.23.attention.wo
+layers.23.attention_norm
+layers.23.ffn_norm
+layers.23.feed_forward
+layers.23.feed_forward.w1
+layers.23.feed_forward.w2
+layers.23.feed_forward.w3
+layers.24
+layers.24.attention
+layers.24.attention.wq
+layers.24.attention.wk
+layers.24.attention.wv
+layers.24.attention.wo
+layers.24.attention_norm
+layers.24.ffn_norm
+layers.24.feed_forward
+layers.24.feed_forward.w1
+layers.24.feed_forward.w2
+layers.24.feed_forward.w3
+layers.25
+layers.25.attention
+layers.25.attention.wq
+layers.25.attention.wk
+layers.25.attention.wv
+layers.25.attention.wo
+layers.25.attention_norm
+layers.25.ffn_norm
+layers.25.feed_forward
+layers.25.feed_forward.w1
+layers.25.feed_forward.w2
+layers.25.feed_forward.w3
+layers.26
+layers.26.attention
+layers.26.attention.wq
+layers.26.attention.wk
+layers.26.attention.wv
+layers.26.attention.wo
+layers.26.attention_norm
+layers.26.ffn_norm
+layers.26.feed_forward
+layers.26.feed_forward.w1
+layers.26.feed_forward.w2
+layers.26.feed_forward.w3
+layers.27
+layers.27.attention
+layers.27.attention.wq
+layers.27.attention.wk
+layers.27.attention.wv
+layers.27.attention.wo
+layers.27.attention_norm
+layers.27.ffn_norm
+layers.27.feed_forward
+layers.27.feed_forward.w1
+layers.27.feed_forward.w2
+layers.27.feed_forward.w3
+layers.28
+layers.28.attention
+layers.28.attention.wq
+layers.28.attention.wk
+layers.28.attention.wv
+layers.28.attention.wo
+layers.28.attention_norm
+layers.28.ffn_norm
+layers.28.feed_forward
+layers.28.feed_forward.w1
+layers.28.feed_forward.w2
+layers.28.feed_forward.w3
+layers.29
+layers.29.attention
+layers.29.attention.wq
+layers.29.attention.wk
+layers.29.attention.wv
+layers.29.attention.wo
+layers.29.attention_norm
+layers.29.ffn_norm
+layers.29.feed_forward
+layers.29.feed_forward.w1
+layers.29.feed_forward.w2
+layers.29.feed_forward.w3
+layers.30
+layers.30.attention
+layers.30.attention.wq
+layers.30.attention.wk
+layers.30.attention.wv
+layers.30.attention.wo
+layers.30.attention_norm
+layers.30.ffn_norm
+layers.30.feed_forward
+layers.30.feed_forward.w1
+layers.30.feed_forward.w2
+layers.30.feed_forward.w3
+layers.31
+layers.31.attention
+layers.31.attention.wq
+layers.31.attention.wk
+layers.31.attention.wv
+layers.31.attention.wo
+layers.31.attention_norm
+layers.31.ffn_norm
+layers.31.feed_forward
+layers.31.feed_forward.w1
+layers.31.feed_forward.w2
+layers.31.feed_forward.w3
+layers.32
+layers.32.attention
+layers.32.attention.wq
+layers.32.attention.wk
+layers.32.attention.wv
+layers.32.attention.wo
+layers.32.attention_norm
+layers.32.ffn_norm
+layers.32.feed_forward
+layers.32.feed_forward.w1
+layers.32.feed_forward.w2
+layers.32.feed_forward.w3
+layers.33
+layers.33.attention
+layers.33.attention.wq
+layers.33.attention.wk
+layers.33.attention.wv
+layers.33.attention.wo
+layers.33.attention_norm
+layers.33.ffn_norm
+layers.33.feed_forward
+layers.33.feed_forward.w1
+layers.33.feed_forward.w2
+layers.33.feed_forward.w3
+layers.34
+layers.34.attention
+layers.34.attention.wq
+layers.34.attention.wk
+layers.34.attention.wv
+layers.34.attention.wo
+layers.34.attention_norm
+layers.34.ffn_norm
+layers.34.feed_forward
+layers.34.feed_forward.w1
+layers.34.feed_forward.w2
+layers.34.feed_forward.w3
+layers.35
+layers.35.attention
+layers.35.attention.wq
+layers.35.attention.wk
+layers.35.attention.wv
+layers.35.attention.wo
+layers.35.attention_norm
+layers.35.ffn_norm
+layers.35.feed_forward
+layers.35.feed_forward.w1
+layers.35.feed_forward.w2
+layers.35.feed_forward.w3
+layers.36
+layers.36.attention
+layers.36.attention.wq
+layers.36.attention.wk
+layers.36.attention.wv
+layers.36.attention.wo
+layers.36.attention_norm
+layers.36.ffn_norm
+layers.36.feed_forward
+layers.36.feed_forward.w1
+layers.36.feed_forward.w2
+layers.36.feed_forward.w3
+layers.37
+layers.37.attention
+layers.37.attention.wq
+layers.37.attention.wk
+layers.37.attention.wv
+layers.37.attention.wo
+layers.37.attention_norm
+layers.37.ffn_norm
+layers.37.feed_forward
+layers.37.feed_forward.w1
+layers.37.feed_forward.w2
+layers.37.feed_forward.w3
+layers.38
+layers.38.attention
+layers.38.attention.wq
+layers.38.attention.wk
+layers.38.attention.wv
+layers.38.attention.wo
+layers.38.attention_norm
+layers.38.ffn_norm
+layers.38.feed_forward
+layers.38.feed_forward.w1
+layers.38.feed_forward.w2
+layers.38.feed_forward.w3
+layers.39
+layers.39.attention
+layers.39.attention.wq
+layers.39.attention.wk
+layers.39.attention.wv
+layers.39.attention.wo
+layers.39.attention_norm
+layers.39.ffn_norm
+layers.39.feed_forward
+layers.39.feed_forward.w1
+layers.39.feed_forward.w2
+layers.39.feed_forward.w3
diff --git a/logs/openai/clip-vit-base-patch32.txt b/logs/openai/clip-vit-base-patch32.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d7f5b3e805eb9b30b7a9368805b8540165dd1bf
--- /dev/null
+++ b/logs/openai/clip-vit-base-patch32.txt
@@ -0,0 +1,306 @@
+
+text_model
+text_model.embeddings
+text_model.embeddings.token_embedding
+text_model.embeddings.position_embedding
+text_model.encoder
+text_model.encoder.layers
+text_model.encoder.layers.0
+text_model.encoder.layers.0.self_attn
+text_model.encoder.layers.0.self_attn.k_proj
+text_model.encoder.layers.0.self_attn.v_proj
+text_model.encoder.layers.0.self_attn.q_proj
+text_model.encoder.layers.0.self_attn.out_proj
+text_model.encoder.layers.0.layer_norm1
+text_model.encoder.layers.0.mlp
+text_model.encoder.layers.0.mlp.activation_fn
+text_model.encoder.layers.0.mlp.fc1
+text_model.encoder.layers.0.mlp.fc2
+text_model.encoder.layers.0.layer_norm2
+text_model.encoder.layers.1
+text_model.encoder.layers.1.self_attn
+text_model.encoder.layers.1.self_attn.k_proj
+text_model.encoder.layers.1.self_attn.v_proj
+text_model.encoder.layers.1.self_attn.q_proj
+text_model.encoder.layers.1.self_attn.out_proj
+text_model.encoder.layers.1.layer_norm1
+text_model.encoder.layers.1.mlp
+text_model.encoder.layers.1.mlp.activation_fn
+text_model.encoder.layers.1.mlp.fc1
+text_model.encoder.layers.1.mlp.fc2
+text_model.encoder.layers.1.layer_norm2
+text_model.encoder.layers.2
+text_model.encoder.layers.2.self_attn
+text_model.encoder.layers.2.self_attn.k_proj
+text_model.encoder.layers.2.self_attn.v_proj
+text_model.encoder.layers.2.self_attn.q_proj
+text_model.encoder.layers.2.self_attn.out_proj
+text_model.encoder.layers.2.layer_norm1
+text_model.encoder.layers.2.mlp
+text_model.encoder.layers.2.mlp.activation_fn
+text_model.encoder.layers.2.mlp.fc1
+text_model.encoder.layers.2.mlp.fc2
+text_model.encoder.layers.2.layer_norm2
+text_model.encoder.layers.3
+text_model.encoder.layers.3.self_attn
+text_model.encoder.layers.3.self_attn.k_proj
+text_model.encoder.layers.3.self_attn.v_proj
+text_model.encoder.layers.3.self_attn.q_proj
+text_model.encoder.layers.3.self_attn.out_proj
+text_model.encoder.layers.3.layer_norm1
+text_model.encoder.layers.3.mlp
+text_model.encoder.layers.3.mlp.activation_fn
+text_model.encoder.layers.3.mlp.fc1
+text_model.encoder.layers.3.mlp.fc2
+text_model.encoder.layers.3.layer_norm2
+text_model.encoder.layers.4
+text_model.encoder.layers.4.self_attn
+text_model.encoder.layers.4.self_attn.k_proj
+text_model.encoder.layers.4.self_attn.v_proj
+text_model.encoder.layers.4.self_attn.q_proj
+text_model.encoder.layers.4.self_attn.out_proj
+text_model.encoder.layers.4.layer_norm1
+text_model.encoder.layers.4.mlp
+text_model.encoder.layers.4.mlp.activation_fn
+text_model.encoder.layers.4.mlp.fc1
+text_model.encoder.layers.4.mlp.fc2
+text_model.encoder.layers.4.layer_norm2
+text_model.encoder.layers.5
+text_model.encoder.layers.5.self_attn
+text_model.encoder.layers.5.self_attn.k_proj
+text_model.encoder.layers.5.self_attn.v_proj
+text_model.encoder.layers.5.self_attn.q_proj
+text_model.encoder.layers.5.self_attn.out_proj
+text_model.encoder.layers.5.layer_norm1
+text_model.encoder.layers.5.mlp
+text_model.encoder.layers.5.mlp.activation_fn
+text_model.encoder.layers.5.mlp.fc1
+text_model.encoder.layers.5.mlp.fc2
+text_model.encoder.layers.5.layer_norm2
+text_model.encoder.layers.6
+text_model.encoder.layers.6.self_attn
+text_model.encoder.layers.6.self_attn.k_proj
+text_model.encoder.layers.6.self_attn.v_proj
+text_model.encoder.layers.6.self_attn.q_proj
+text_model.encoder.layers.6.self_attn.out_proj
+text_model.encoder.layers.6.layer_norm1
+text_model.encoder.layers.6.mlp
+text_model.encoder.layers.6.mlp.activation_fn
+text_model.encoder.layers.6.mlp.fc1
+text_model.encoder.layers.6.mlp.fc2
+text_model.encoder.layers.6.layer_norm2
+text_model.encoder.layers.7
+text_model.encoder.layers.7.self_attn
+text_model.encoder.layers.7.self_attn.k_proj
+text_model.encoder.layers.7.self_attn.v_proj
+text_model.encoder.layers.7.self_attn.q_proj
+text_model.encoder.layers.7.self_attn.out_proj
+text_model.encoder.layers.7.layer_norm1
+text_model.encoder.layers.7.mlp
+text_model.encoder.layers.7.mlp.activation_fn
+text_model.encoder.layers.7.mlp.fc1
+text_model.encoder.layers.7.mlp.fc2
+text_model.encoder.layers.7.layer_norm2
+text_model.encoder.layers.8
+text_model.encoder.layers.8.self_attn
+text_model.encoder.layers.8.self_attn.k_proj
+text_model.encoder.layers.8.self_attn.v_proj
+text_model.encoder.layers.8.self_attn.q_proj
+text_model.encoder.layers.8.self_attn.out_proj
+text_model.encoder.layers.8.layer_norm1
+text_model.encoder.layers.8.mlp
+text_model.encoder.layers.8.mlp.activation_fn
+text_model.encoder.layers.8.mlp.fc1
+text_model.encoder.layers.8.mlp.fc2
+text_model.encoder.layers.8.layer_norm2
+text_model.encoder.layers.9
+text_model.encoder.layers.9.self_attn
+text_model.encoder.layers.9.self_attn.k_proj
+text_model.encoder.layers.9.self_attn.v_proj
+text_model.encoder.layers.9.self_attn.q_proj
+text_model.encoder.layers.9.self_attn.out_proj
+text_model.encoder.layers.9.layer_norm1
+text_model.encoder.layers.9.mlp
+text_model.encoder.layers.9.mlp.activation_fn
+text_model.encoder.layers.9.mlp.fc1
+text_model.encoder.layers.9.mlp.fc2
+text_model.encoder.layers.9.layer_norm2
+text_model.encoder.layers.10
+text_model.encoder.layers.10.self_attn
+text_model.encoder.layers.10.self_attn.k_proj
+text_model.encoder.layers.10.self_attn.v_proj
+text_model.encoder.layers.10.self_attn.q_proj
+text_model.encoder.layers.10.self_attn.out_proj
+text_model.encoder.layers.10.layer_norm1
+text_model.encoder.layers.10.mlp
+text_model.encoder.layers.10.mlp.activation_fn
+text_model.encoder.layers.10.mlp.fc1
+text_model.encoder.layers.10.mlp.fc2
+text_model.encoder.layers.10.layer_norm2
+text_model.encoder.layers.11
+text_model.encoder.layers.11.self_attn
+text_model.encoder.layers.11.self_attn.k_proj
+text_model.encoder.layers.11.self_attn.v_proj
+text_model.encoder.layers.11.self_attn.q_proj
+text_model.encoder.layers.11.self_attn.out_proj
+text_model.encoder.layers.11.layer_norm1
+text_model.encoder.layers.11.mlp
+text_model.encoder.layers.11.mlp.activation_fn
+text_model.encoder.layers.11.mlp.fc1
+text_model.encoder.layers.11.mlp.fc2
+text_model.encoder.layers.11.layer_norm2
+text_model.final_layer_norm
+vision_model
+vision_model.embeddings
+vision_model.embeddings.patch_embedding
+vision_model.embeddings.position_embedding
+vision_model.pre_layrnorm
+vision_model.encoder
+vision_model.encoder.layers
+vision_model.encoder.layers.0
+vision_model.encoder.layers.0.self_attn
+vision_model.encoder.layers.0.self_attn.k_proj
+vision_model.encoder.layers.0.self_attn.v_proj
+vision_model.encoder.layers.0.self_attn.q_proj
+vision_model.encoder.layers.0.self_attn.out_proj
+vision_model.encoder.layers.0.layer_norm1
+vision_model.encoder.layers.0.mlp
+vision_model.encoder.layers.0.mlp.activation_fn
+vision_model.encoder.layers.0.mlp.fc1
+vision_model.encoder.layers.0.mlp.fc2
+vision_model.encoder.layers.0.layer_norm2
+vision_model.encoder.layers.1
+vision_model.encoder.layers.1.self_attn
+vision_model.encoder.layers.1.self_attn.k_proj
+vision_model.encoder.layers.1.self_attn.v_proj
+vision_model.encoder.layers.1.self_attn.q_proj
+vision_model.encoder.layers.1.self_attn.out_proj
+vision_model.encoder.layers.1.layer_norm1
+vision_model.encoder.layers.1.mlp
+vision_model.encoder.layers.1.mlp.activation_fn
+vision_model.encoder.layers.1.mlp.fc1
+vision_model.encoder.layers.1.mlp.fc2
+vision_model.encoder.layers.1.layer_norm2
+vision_model.encoder.layers.2
+vision_model.encoder.layers.2.self_attn
+vision_model.encoder.layers.2.self_attn.k_proj
+vision_model.encoder.layers.2.self_attn.v_proj
+vision_model.encoder.layers.2.self_attn.q_proj
+vision_model.encoder.layers.2.self_attn.out_proj
+vision_model.encoder.layers.2.layer_norm1
+vision_model.encoder.layers.2.mlp
+vision_model.encoder.layers.2.mlp.activation_fn
+vision_model.encoder.layers.2.mlp.fc1
+vision_model.encoder.layers.2.mlp.fc2
+vision_model.encoder.layers.2.layer_norm2
+vision_model.encoder.layers.3
+vision_model.encoder.layers.3.self_attn
+vision_model.encoder.layers.3.self_attn.k_proj
+vision_model.encoder.layers.3.self_attn.v_proj
+vision_model.encoder.layers.3.self_attn.q_proj
+vision_model.encoder.layers.3.self_attn.out_proj
+vision_model.encoder.layers.3.layer_norm1
+vision_model.encoder.layers.3.mlp
+vision_model.encoder.layers.3.mlp.activation_fn
+vision_model.encoder.layers.3.mlp.fc1
+vision_model.encoder.layers.3.mlp.fc2
+vision_model.encoder.layers.3.layer_norm2
+vision_model.encoder.layers.4
+vision_model.encoder.layers.4.self_attn
+vision_model.encoder.layers.4.self_attn.k_proj
+vision_model.encoder.layers.4.self_attn.v_proj
+vision_model.encoder.layers.4.self_attn.q_proj
+vision_model.encoder.layers.4.self_attn.out_proj
+vision_model.encoder.layers.4.layer_norm1
+vision_model.encoder.layers.4.mlp
+vision_model.encoder.layers.4.mlp.activation_fn
+vision_model.encoder.layers.4.mlp.fc1
+vision_model.encoder.layers.4.mlp.fc2
+vision_model.encoder.layers.4.layer_norm2
+vision_model.encoder.layers.5
+vision_model.encoder.layers.5.self_attn
+vision_model.encoder.layers.5.self_attn.k_proj
+vision_model.encoder.layers.5.self_attn.v_proj
+vision_model.encoder.layers.5.self_attn.q_proj
+vision_model.encoder.layers.5.self_attn.out_proj
+vision_model.encoder.layers.5.layer_norm1
+vision_model.encoder.layers.5.mlp
+vision_model.encoder.layers.5.mlp.activation_fn
+vision_model.encoder.layers.5.mlp.fc1
+vision_model.encoder.layers.5.mlp.fc2
+vision_model.encoder.layers.5.layer_norm2
+vision_model.encoder.layers.6
+vision_model.encoder.layers.6.self_attn
+vision_model.encoder.layers.6.self_attn.k_proj
+vision_model.encoder.layers.6.self_attn.v_proj
+vision_model.encoder.layers.6.self_attn.q_proj
+vision_model.encoder.layers.6.self_attn.out_proj
+vision_model.encoder.layers.6.layer_norm1
+vision_model.encoder.layers.6.mlp
+vision_model.encoder.layers.6.mlp.activation_fn
+vision_model.encoder.layers.6.mlp.fc1
+vision_model.encoder.layers.6.mlp.fc2
+vision_model.encoder.layers.6.layer_norm2
+vision_model.encoder.layers.7
+vision_model.encoder.layers.7.self_attn
+vision_model.encoder.layers.7.self_attn.k_proj
+vision_model.encoder.layers.7.self_attn.v_proj
+vision_model.encoder.layers.7.self_attn.q_proj
+vision_model.encoder.layers.7.self_attn.out_proj
+vision_model.encoder.layers.7.layer_norm1
+vision_model.encoder.layers.7.mlp
+vision_model.encoder.layers.7.mlp.activation_fn
+vision_model.encoder.layers.7.mlp.fc1
+vision_model.encoder.layers.7.mlp.fc2
+vision_model.encoder.layers.7.layer_norm2
+vision_model.encoder.layers.8
+vision_model.encoder.layers.8.self_attn
+vision_model.encoder.layers.8.self_attn.k_proj
+vision_model.encoder.layers.8.self_attn.v_proj
+vision_model.encoder.layers.8.self_attn.q_proj
+vision_model.encoder.layers.8.self_attn.out_proj
+vision_model.encoder.layers.8.layer_norm1
+vision_model.encoder.layers.8.mlp
+vision_model.encoder.layers.8.mlp.activation_fn
+vision_model.encoder.layers.8.mlp.fc1
+vision_model.encoder.layers.8.mlp.fc2
+vision_model.encoder.layers.8.layer_norm2
+vision_model.encoder.layers.9
+vision_model.encoder.layers.9.self_attn
+vision_model.encoder.layers.9.self_attn.k_proj
+vision_model.encoder.layers.9.self_attn.v_proj
+vision_model.encoder.layers.9.self_attn.q_proj
+vision_model.encoder.layers.9.self_attn.out_proj
+vision_model.encoder.layers.9.layer_norm1
+vision_model.encoder.layers.9.mlp
+vision_model.encoder.layers.9.mlp.activation_fn
+vision_model.encoder.layers.9.mlp.fc1
+vision_model.encoder.layers.9.mlp.fc2
+vision_model.encoder.layers.9.layer_norm2
+vision_model.encoder.layers.10
+vision_model.encoder.layers.10.self_attn
+vision_model.encoder.layers.10.self_attn.k_proj
+vision_model.encoder.layers.10.self_attn.v_proj
+vision_model.encoder.layers.10.self_attn.q_proj
+vision_model.encoder.layers.10.self_attn.out_proj
+vision_model.encoder.layers.10.layer_norm1
+vision_model.encoder.layers.10.mlp
+vision_model.encoder.layers.10.mlp.activation_fn
+vision_model.encoder.layers.10.mlp.fc1
+vision_model.encoder.layers.10.mlp.fc2
+vision_model.encoder.layers.10.layer_norm2
+vision_model.encoder.layers.11
+vision_model.encoder.layers.11.self_attn
+vision_model.encoder.layers.11.self_attn.k_proj
+vision_model.encoder.layers.11.self_attn.v_proj
+vision_model.encoder.layers.11.self_attn.q_proj
+vision_model.encoder.layers.11.self_attn.out_proj
+vision_model.encoder.layers.11.layer_norm1
+vision_model.encoder.layers.11.mlp
+vision_model.encoder.layers.11.mlp.activation_fn
+vision_model.encoder.layers.11.mlp.fc1
+vision_model.encoder.layers.11.mlp.fc2
+vision_model.encoder.layers.11.layer_norm2
+vision_model.post_layernorm
+visual_projection
+text_projection
diff --git a/logs/openbmb/MiniCPM-o-2_6.txt b/logs/openbmb/MiniCPM-o-2_6.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5e64233d9ede395479ac1e795bd4826478e0b5fd
--- /dev/null
+++ b/logs/openbmb/MiniCPM-o-2_6.txt
@@ -0,0 +1,1466 @@
+
+llm
+llm.model
+llm.model.embed_tokens
+llm.model.layers
+llm.model.layers.0
+llm.model.layers.0.self_attn
+llm.model.layers.0.self_attn.q_proj
+llm.model.layers.0.self_attn.k_proj
+llm.model.layers.0.self_attn.v_proj
+llm.model.layers.0.self_attn.o_proj
+llm.model.layers.0.mlp
+llm.model.layers.0.mlp.gate_proj
+llm.model.layers.0.mlp.up_proj
+llm.model.layers.0.mlp.down_proj
+llm.model.layers.0.mlp.act_fn
+llm.model.layers.0.input_layernorm
+llm.model.layers.0.post_attention_layernorm
+llm.model.layers.1
+llm.model.layers.1.self_attn
+llm.model.layers.1.self_attn.q_proj
+llm.model.layers.1.self_attn.k_proj
+llm.model.layers.1.self_attn.v_proj
+llm.model.layers.1.self_attn.o_proj
+llm.model.layers.1.mlp
+llm.model.layers.1.mlp.gate_proj
+llm.model.layers.1.mlp.up_proj
+llm.model.layers.1.mlp.down_proj
+llm.model.layers.1.mlp.act_fn
+llm.model.layers.1.input_layernorm
+llm.model.layers.1.post_attention_layernorm
+llm.model.layers.2
+llm.model.layers.2.self_attn
+llm.model.layers.2.self_attn.q_proj
+llm.model.layers.2.self_attn.k_proj
+llm.model.layers.2.self_attn.v_proj
+llm.model.layers.2.self_attn.o_proj
+llm.model.layers.2.mlp
+llm.model.layers.2.mlp.gate_proj
+llm.model.layers.2.mlp.up_proj
+llm.model.layers.2.mlp.down_proj
+llm.model.layers.2.mlp.act_fn
+llm.model.layers.2.input_layernorm
+llm.model.layers.2.post_attention_layernorm
+llm.model.layers.3
+llm.model.layers.3.self_attn
+llm.model.layers.3.self_attn.q_proj
+llm.model.layers.3.self_attn.k_proj
+llm.model.layers.3.self_attn.v_proj
+llm.model.layers.3.self_attn.o_proj
+llm.model.layers.3.mlp
+llm.model.layers.3.mlp.gate_proj
+llm.model.layers.3.mlp.up_proj
+llm.model.layers.3.mlp.down_proj
+llm.model.layers.3.mlp.act_fn
+llm.model.layers.3.input_layernorm
+llm.model.layers.3.post_attention_layernorm
+llm.model.layers.4
+llm.model.layers.4.self_attn
+llm.model.layers.4.self_attn.q_proj
+llm.model.layers.4.self_attn.k_proj
+llm.model.layers.4.self_attn.v_proj
+llm.model.layers.4.self_attn.o_proj
+llm.model.layers.4.mlp
+llm.model.layers.4.mlp.gate_proj
+llm.model.layers.4.mlp.up_proj
+llm.model.layers.4.mlp.down_proj
+llm.model.layers.4.mlp.act_fn
+llm.model.layers.4.input_layernorm
+llm.model.layers.4.post_attention_layernorm
+llm.model.layers.5
+llm.model.layers.5.self_attn
+llm.model.layers.5.self_attn.q_proj
+llm.model.layers.5.self_attn.k_proj
+llm.model.layers.5.self_attn.v_proj
+llm.model.layers.5.self_attn.o_proj
+llm.model.layers.5.mlp
+llm.model.layers.5.mlp.gate_proj
+llm.model.layers.5.mlp.up_proj
+llm.model.layers.5.mlp.down_proj
+llm.model.layers.5.mlp.act_fn
+llm.model.layers.5.input_layernorm
+llm.model.layers.5.post_attention_layernorm
+llm.model.layers.6
+llm.model.layers.6.self_attn
+llm.model.layers.6.self_attn.q_proj
+llm.model.layers.6.self_attn.k_proj
+llm.model.layers.6.self_attn.v_proj
+llm.model.layers.6.self_attn.o_proj
+llm.model.layers.6.mlp
+llm.model.layers.6.mlp.gate_proj
+llm.model.layers.6.mlp.up_proj
+llm.model.layers.6.mlp.down_proj
+llm.model.layers.6.mlp.act_fn
+llm.model.layers.6.input_layernorm
+llm.model.layers.6.post_attention_layernorm
+llm.model.layers.7
+llm.model.layers.7.self_attn
+llm.model.layers.7.self_attn.q_proj
+llm.model.layers.7.self_attn.k_proj
+llm.model.layers.7.self_attn.v_proj
+llm.model.layers.7.self_attn.o_proj
+llm.model.layers.7.mlp
+llm.model.layers.7.mlp.gate_proj
+llm.model.layers.7.mlp.up_proj
+llm.model.layers.7.mlp.down_proj
+llm.model.layers.7.mlp.act_fn
+llm.model.layers.7.input_layernorm
+llm.model.layers.7.post_attention_layernorm
+llm.model.layers.8
+llm.model.layers.8.self_attn
+llm.model.layers.8.self_attn.q_proj
+llm.model.layers.8.self_attn.k_proj
+llm.model.layers.8.self_attn.v_proj
+llm.model.layers.8.self_attn.o_proj
+llm.model.layers.8.mlp
+llm.model.layers.8.mlp.gate_proj
+llm.model.layers.8.mlp.up_proj
+llm.model.layers.8.mlp.down_proj
+llm.model.layers.8.mlp.act_fn
+llm.model.layers.8.input_layernorm
+llm.model.layers.8.post_attention_layernorm
+llm.model.layers.9
+llm.model.layers.9.self_attn
+llm.model.layers.9.self_attn.q_proj
+llm.model.layers.9.self_attn.k_proj
+llm.model.layers.9.self_attn.v_proj
+llm.model.layers.9.self_attn.o_proj
+llm.model.layers.9.mlp
+llm.model.layers.9.mlp.gate_proj
+llm.model.layers.9.mlp.up_proj
+llm.model.layers.9.mlp.down_proj
+llm.model.layers.9.mlp.act_fn
+llm.model.layers.9.input_layernorm
+llm.model.layers.9.post_attention_layernorm
+llm.model.layers.10
+llm.model.layers.10.self_attn
+llm.model.layers.10.self_attn.q_proj
+llm.model.layers.10.self_attn.k_proj
+llm.model.layers.10.self_attn.v_proj
+llm.model.layers.10.self_attn.o_proj
+llm.model.layers.10.mlp
+llm.model.layers.10.mlp.gate_proj
+llm.model.layers.10.mlp.up_proj
+llm.model.layers.10.mlp.down_proj
+llm.model.layers.10.mlp.act_fn
+llm.model.layers.10.input_layernorm
+llm.model.layers.10.post_attention_layernorm
+llm.model.layers.11
+llm.model.layers.11.self_attn
+llm.model.layers.11.self_attn.q_proj
+llm.model.layers.11.self_attn.k_proj
+llm.model.layers.11.self_attn.v_proj
+llm.model.layers.11.self_attn.o_proj
+llm.model.layers.11.mlp
+llm.model.layers.11.mlp.gate_proj
+llm.model.layers.11.mlp.up_proj
+llm.model.layers.11.mlp.down_proj
+llm.model.layers.11.mlp.act_fn
+llm.model.layers.11.input_layernorm
+llm.model.layers.11.post_attention_layernorm
+llm.model.layers.12
+llm.model.layers.12.self_attn
+llm.model.layers.12.self_attn.q_proj
+llm.model.layers.12.self_attn.k_proj
+llm.model.layers.12.self_attn.v_proj
+llm.model.layers.12.self_attn.o_proj
+llm.model.layers.12.mlp
+llm.model.layers.12.mlp.gate_proj
+llm.model.layers.12.mlp.up_proj
+llm.model.layers.12.mlp.down_proj
+llm.model.layers.12.mlp.act_fn
+llm.model.layers.12.input_layernorm
+llm.model.layers.12.post_attention_layernorm
+llm.model.layers.13
+llm.model.layers.13.self_attn
+llm.model.layers.13.self_attn.q_proj
+llm.model.layers.13.self_attn.k_proj
+llm.model.layers.13.self_attn.v_proj
+llm.model.layers.13.self_attn.o_proj
+llm.model.layers.13.mlp
+llm.model.layers.13.mlp.gate_proj
+llm.model.layers.13.mlp.up_proj
+llm.model.layers.13.mlp.down_proj
+llm.model.layers.13.mlp.act_fn
+llm.model.layers.13.input_layernorm
+llm.model.layers.13.post_attention_layernorm
+llm.model.layers.14
+llm.model.layers.14.self_attn
+llm.model.layers.14.self_attn.q_proj
+llm.model.layers.14.self_attn.k_proj
+llm.model.layers.14.self_attn.v_proj
+llm.model.layers.14.self_attn.o_proj
+llm.model.layers.14.mlp
+llm.model.layers.14.mlp.gate_proj
+llm.model.layers.14.mlp.up_proj
+llm.model.layers.14.mlp.down_proj
+llm.model.layers.14.mlp.act_fn
+llm.model.layers.14.input_layernorm
+llm.model.layers.14.post_attention_layernorm
+llm.model.layers.15
+llm.model.layers.15.self_attn
+llm.model.layers.15.self_attn.q_proj
+llm.model.layers.15.self_attn.k_proj
+llm.model.layers.15.self_attn.v_proj
+llm.model.layers.15.self_attn.o_proj
+llm.model.layers.15.mlp
+llm.model.layers.15.mlp.gate_proj
+llm.model.layers.15.mlp.up_proj
+llm.model.layers.15.mlp.down_proj
+llm.model.layers.15.mlp.act_fn
+llm.model.layers.15.input_layernorm
+llm.model.layers.15.post_attention_layernorm
+llm.model.layers.16
+llm.model.layers.16.self_attn
+llm.model.layers.16.self_attn.q_proj
+llm.model.layers.16.self_attn.k_proj
+llm.model.layers.16.self_attn.v_proj
+llm.model.layers.16.self_attn.o_proj
+llm.model.layers.16.mlp
+llm.model.layers.16.mlp.gate_proj
+llm.model.layers.16.mlp.up_proj
+llm.model.layers.16.mlp.down_proj
+llm.model.layers.16.mlp.act_fn
+llm.model.layers.16.input_layernorm
+llm.model.layers.16.post_attention_layernorm
+llm.model.layers.17
+llm.model.layers.17.self_attn
+llm.model.layers.17.self_attn.q_proj
+llm.model.layers.17.self_attn.k_proj
+llm.model.layers.17.self_attn.v_proj
+llm.model.layers.17.self_attn.o_proj
+llm.model.layers.17.mlp
+llm.model.layers.17.mlp.gate_proj
+llm.model.layers.17.mlp.up_proj
+llm.model.layers.17.mlp.down_proj
+llm.model.layers.17.mlp.act_fn
+llm.model.layers.17.input_layernorm
+llm.model.layers.17.post_attention_layernorm
+llm.model.layers.18
+llm.model.layers.18.self_attn
+llm.model.layers.18.self_attn.q_proj
+llm.model.layers.18.self_attn.k_proj
+llm.model.layers.18.self_attn.v_proj
+llm.model.layers.18.self_attn.o_proj
+llm.model.layers.18.mlp
+llm.model.layers.18.mlp.gate_proj
+llm.model.layers.18.mlp.up_proj
+llm.model.layers.18.mlp.down_proj
+llm.model.layers.18.mlp.act_fn
+llm.model.layers.18.input_layernorm
+llm.model.layers.18.post_attention_layernorm
+llm.model.layers.19
+llm.model.layers.19.self_attn
+llm.model.layers.19.self_attn.q_proj
+llm.model.layers.19.self_attn.k_proj
+llm.model.layers.19.self_attn.v_proj
+llm.model.layers.19.self_attn.o_proj
+llm.model.layers.19.mlp
+llm.model.layers.19.mlp.gate_proj
+llm.model.layers.19.mlp.up_proj
+llm.model.layers.19.mlp.down_proj
+llm.model.layers.19.mlp.act_fn
+llm.model.layers.19.input_layernorm
+llm.model.layers.19.post_attention_layernorm
+llm.model.layers.20
+llm.model.layers.20.self_attn
+llm.model.layers.20.self_attn.q_proj
+llm.model.layers.20.self_attn.k_proj
+llm.model.layers.20.self_attn.v_proj
+llm.model.layers.20.self_attn.o_proj
+llm.model.layers.20.mlp
+llm.model.layers.20.mlp.gate_proj
+llm.model.layers.20.mlp.up_proj
+llm.model.layers.20.mlp.down_proj
+llm.model.layers.20.mlp.act_fn
+llm.model.layers.20.input_layernorm
+llm.model.layers.20.post_attention_layernorm
+llm.model.layers.21
+llm.model.layers.21.self_attn
+llm.model.layers.21.self_attn.q_proj
+llm.model.layers.21.self_attn.k_proj
+llm.model.layers.21.self_attn.v_proj
+llm.model.layers.21.self_attn.o_proj
+llm.model.layers.21.mlp
+llm.model.layers.21.mlp.gate_proj
+llm.model.layers.21.mlp.up_proj
+llm.model.layers.21.mlp.down_proj
+llm.model.layers.21.mlp.act_fn
+llm.model.layers.21.input_layernorm
+llm.model.layers.21.post_attention_layernorm
+llm.model.layers.22
+llm.model.layers.22.self_attn
+llm.model.layers.22.self_attn.q_proj
+llm.model.layers.22.self_attn.k_proj
+llm.model.layers.22.self_attn.v_proj
+llm.model.layers.22.self_attn.o_proj
+llm.model.layers.22.mlp
+llm.model.layers.22.mlp.gate_proj
+llm.model.layers.22.mlp.up_proj
+llm.model.layers.22.mlp.down_proj
+llm.model.layers.22.mlp.act_fn
+llm.model.layers.22.input_layernorm
+llm.model.layers.22.post_attention_layernorm
+llm.model.layers.23
+llm.model.layers.23.self_attn
+llm.model.layers.23.self_attn.q_proj
+llm.model.layers.23.self_attn.k_proj
+llm.model.layers.23.self_attn.v_proj
+llm.model.layers.23.self_attn.o_proj
+llm.model.layers.23.mlp
+llm.model.layers.23.mlp.gate_proj
+llm.model.layers.23.mlp.up_proj
+llm.model.layers.23.mlp.down_proj
+llm.model.layers.23.mlp.act_fn
+llm.model.layers.23.input_layernorm
+llm.model.layers.23.post_attention_layernorm
+llm.model.layers.24
+llm.model.layers.24.self_attn
+llm.model.layers.24.self_attn.q_proj
+llm.model.layers.24.self_attn.k_proj
+llm.model.layers.24.self_attn.v_proj
+llm.model.layers.24.self_attn.o_proj
+llm.model.layers.24.mlp
+llm.model.layers.24.mlp.gate_proj
+llm.model.layers.24.mlp.up_proj
+llm.model.layers.24.mlp.down_proj
+llm.model.layers.24.mlp.act_fn
+llm.model.layers.24.input_layernorm
+llm.model.layers.24.post_attention_layernorm
+llm.model.layers.25
+llm.model.layers.25.self_attn
+llm.model.layers.25.self_attn.q_proj
+llm.model.layers.25.self_attn.k_proj
+llm.model.layers.25.self_attn.v_proj
+llm.model.layers.25.self_attn.o_proj
+llm.model.layers.25.mlp
+llm.model.layers.25.mlp.gate_proj
+llm.model.layers.25.mlp.up_proj
+llm.model.layers.25.mlp.down_proj
+llm.model.layers.25.mlp.act_fn
+llm.model.layers.25.input_layernorm
+llm.model.layers.25.post_attention_layernorm
+llm.model.layers.26
+llm.model.layers.26.self_attn
+llm.model.layers.26.self_attn.q_proj
+llm.model.layers.26.self_attn.k_proj
+llm.model.layers.26.self_attn.v_proj
+llm.model.layers.26.self_attn.o_proj
+llm.model.layers.26.mlp
+llm.model.layers.26.mlp.gate_proj
+llm.model.layers.26.mlp.up_proj
+llm.model.layers.26.mlp.down_proj
+llm.model.layers.26.mlp.act_fn
+llm.model.layers.26.input_layernorm
+llm.model.layers.26.post_attention_layernorm
+llm.model.layers.27
+llm.model.layers.27.self_attn
+llm.model.layers.27.self_attn.q_proj
+llm.model.layers.27.self_attn.k_proj
+llm.model.layers.27.self_attn.v_proj
+llm.model.layers.27.self_attn.o_proj
+llm.model.layers.27.mlp
+llm.model.layers.27.mlp.gate_proj
+llm.model.layers.27.mlp.up_proj
+llm.model.layers.27.mlp.down_proj
+llm.model.layers.27.mlp.act_fn
+llm.model.layers.27.input_layernorm
+llm.model.layers.27.post_attention_layernorm
+llm.model.norm
+llm.model.rotary_emb
+llm.lm_head
+vpm
+vpm.embeddings
+vpm.embeddings.patch_embedding
+vpm.embeddings.position_embedding
+vpm.encoder
+vpm.encoder.layers
+vpm.encoder.layers.0
+vpm.encoder.layers.0.self_attn
+vpm.encoder.layers.0.self_attn.k_proj
+vpm.encoder.layers.0.self_attn.v_proj
+vpm.encoder.layers.0.self_attn.q_proj
+vpm.encoder.layers.0.self_attn.out_proj
+vpm.encoder.layers.0.layer_norm1
+vpm.encoder.layers.0.mlp
+vpm.encoder.layers.0.mlp.activation_fn
+vpm.encoder.layers.0.mlp.fc1
+vpm.encoder.layers.0.mlp.fc2
+vpm.encoder.layers.0.layer_norm2
+vpm.encoder.layers.1
+vpm.encoder.layers.1.self_attn
+vpm.encoder.layers.1.self_attn.k_proj
+vpm.encoder.layers.1.self_attn.v_proj
+vpm.encoder.layers.1.self_attn.q_proj
+vpm.encoder.layers.1.self_attn.out_proj
+vpm.encoder.layers.1.layer_norm1
+vpm.encoder.layers.1.mlp
+vpm.encoder.layers.1.mlp.activation_fn
+vpm.encoder.layers.1.mlp.fc1
+vpm.encoder.layers.1.mlp.fc2
+vpm.encoder.layers.1.layer_norm2
+vpm.encoder.layers.2
+vpm.encoder.layers.2.self_attn
+vpm.encoder.layers.2.self_attn.k_proj
+vpm.encoder.layers.2.self_attn.v_proj
+vpm.encoder.layers.2.self_attn.q_proj
+vpm.encoder.layers.2.self_attn.out_proj
+vpm.encoder.layers.2.layer_norm1
+vpm.encoder.layers.2.mlp
+vpm.encoder.layers.2.mlp.activation_fn
+vpm.encoder.layers.2.mlp.fc1
+vpm.encoder.layers.2.mlp.fc2
+vpm.encoder.layers.2.layer_norm2
+vpm.encoder.layers.3
+vpm.encoder.layers.3.self_attn
+vpm.encoder.layers.3.self_attn.k_proj
+vpm.encoder.layers.3.self_attn.v_proj
+vpm.encoder.layers.3.self_attn.q_proj
+vpm.encoder.layers.3.self_attn.out_proj
+vpm.encoder.layers.3.layer_norm1
+vpm.encoder.layers.3.mlp
+vpm.encoder.layers.3.mlp.activation_fn
+vpm.encoder.layers.3.mlp.fc1
+vpm.encoder.layers.3.mlp.fc2
+vpm.encoder.layers.3.layer_norm2
+vpm.encoder.layers.4
+vpm.encoder.layers.4.self_attn
+vpm.encoder.layers.4.self_attn.k_proj
+vpm.encoder.layers.4.self_attn.v_proj
+vpm.encoder.layers.4.self_attn.q_proj
+vpm.encoder.layers.4.self_attn.out_proj
+vpm.encoder.layers.4.layer_norm1
+vpm.encoder.layers.4.mlp
+vpm.encoder.layers.4.mlp.activation_fn
+vpm.encoder.layers.4.mlp.fc1
+vpm.encoder.layers.4.mlp.fc2
+vpm.encoder.layers.4.layer_norm2
+vpm.encoder.layers.5
+vpm.encoder.layers.5.self_attn
+vpm.encoder.layers.5.self_attn.k_proj
+vpm.encoder.layers.5.self_attn.v_proj
+vpm.encoder.layers.5.self_attn.q_proj
+vpm.encoder.layers.5.self_attn.out_proj
+vpm.encoder.layers.5.layer_norm1
+vpm.encoder.layers.5.mlp
+vpm.encoder.layers.5.mlp.activation_fn
+vpm.encoder.layers.5.mlp.fc1
+vpm.encoder.layers.5.mlp.fc2
+vpm.encoder.layers.5.layer_norm2
+vpm.encoder.layers.6
+vpm.encoder.layers.6.self_attn
+vpm.encoder.layers.6.self_attn.k_proj
+vpm.encoder.layers.6.self_attn.v_proj
+vpm.encoder.layers.6.self_attn.q_proj
+vpm.encoder.layers.6.self_attn.out_proj
+vpm.encoder.layers.6.layer_norm1
+vpm.encoder.layers.6.mlp
+vpm.encoder.layers.6.mlp.activation_fn
+vpm.encoder.layers.6.mlp.fc1
+vpm.encoder.layers.6.mlp.fc2
+vpm.encoder.layers.6.layer_norm2
+vpm.encoder.layers.7
+vpm.encoder.layers.7.self_attn
+vpm.encoder.layers.7.self_attn.k_proj
+vpm.encoder.layers.7.self_attn.v_proj
+vpm.encoder.layers.7.self_attn.q_proj
+vpm.encoder.layers.7.self_attn.out_proj
+vpm.encoder.layers.7.layer_norm1
+vpm.encoder.layers.7.mlp
+vpm.encoder.layers.7.mlp.activation_fn
+vpm.encoder.layers.7.mlp.fc1
+vpm.encoder.layers.7.mlp.fc2
+vpm.encoder.layers.7.layer_norm2
+vpm.encoder.layers.8
+vpm.encoder.layers.8.self_attn
+vpm.encoder.layers.8.self_attn.k_proj
+vpm.encoder.layers.8.self_attn.v_proj
+vpm.encoder.layers.8.self_attn.q_proj
+vpm.encoder.layers.8.self_attn.out_proj
+vpm.encoder.layers.8.layer_norm1
+vpm.encoder.layers.8.mlp
+vpm.encoder.layers.8.mlp.activation_fn
+vpm.encoder.layers.8.mlp.fc1
+vpm.encoder.layers.8.mlp.fc2
+vpm.encoder.layers.8.layer_norm2
+vpm.encoder.layers.9
+vpm.encoder.layers.9.self_attn
+vpm.encoder.layers.9.self_attn.k_proj
+vpm.encoder.layers.9.self_attn.v_proj
+vpm.encoder.layers.9.self_attn.q_proj
+vpm.encoder.layers.9.self_attn.out_proj
+vpm.encoder.layers.9.layer_norm1
+vpm.encoder.layers.9.mlp
+vpm.encoder.layers.9.mlp.activation_fn
+vpm.encoder.layers.9.mlp.fc1
+vpm.encoder.layers.9.mlp.fc2
+vpm.encoder.layers.9.layer_norm2
+vpm.encoder.layers.10
+vpm.encoder.layers.10.self_attn
+vpm.encoder.layers.10.self_attn.k_proj
+vpm.encoder.layers.10.self_attn.v_proj
+vpm.encoder.layers.10.self_attn.q_proj
+vpm.encoder.layers.10.self_attn.out_proj
+vpm.encoder.layers.10.layer_norm1
+vpm.encoder.layers.10.mlp
+vpm.encoder.layers.10.mlp.activation_fn
+vpm.encoder.layers.10.mlp.fc1
+vpm.encoder.layers.10.mlp.fc2
+vpm.encoder.layers.10.layer_norm2
+vpm.encoder.layers.11
+vpm.encoder.layers.11.self_attn
+vpm.encoder.layers.11.self_attn.k_proj
+vpm.encoder.layers.11.self_attn.v_proj
+vpm.encoder.layers.11.self_attn.q_proj
+vpm.encoder.layers.11.self_attn.out_proj
+vpm.encoder.layers.11.layer_norm1
+vpm.encoder.layers.11.mlp
+vpm.encoder.layers.11.mlp.activation_fn
+vpm.encoder.layers.11.mlp.fc1
+vpm.encoder.layers.11.mlp.fc2
+vpm.encoder.layers.11.layer_norm2
+vpm.encoder.layers.12
+vpm.encoder.layers.12.self_attn
+vpm.encoder.layers.12.self_attn.k_proj
+vpm.encoder.layers.12.self_attn.v_proj
+vpm.encoder.layers.12.self_attn.q_proj
+vpm.encoder.layers.12.self_attn.out_proj
+vpm.encoder.layers.12.layer_norm1
+vpm.encoder.layers.12.mlp
+vpm.encoder.layers.12.mlp.activation_fn
+vpm.encoder.layers.12.mlp.fc1
+vpm.encoder.layers.12.mlp.fc2
+vpm.encoder.layers.12.layer_norm2
+vpm.encoder.layers.13
+vpm.encoder.layers.13.self_attn
+vpm.encoder.layers.13.self_attn.k_proj
+vpm.encoder.layers.13.self_attn.v_proj
+vpm.encoder.layers.13.self_attn.q_proj
+vpm.encoder.layers.13.self_attn.out_proj
+vpm.encoder.layers.13.layer_norm1
+vpm.encoder.layers.13.mlp
+vpm.encoder.layers.13.mlp.activation_fn
+vpm.encoder.layers.13.mlp.fc1
+vpm.encoder.layers.13.mlp.fc2
+vpm.encoder.layers.13.layer_norm2
+vpm.encoder.layers.14
+vpm.encoder.layers.14.self_attn
+vpm.encoder.layers.14.self_attn.k_proj
+vpm.encoder.layers.14.self_attn.v_proj
+vpm.encoder.layers.14.self_attn.q_proj
+vpm.encoder.layers.14.self_attn.out_proj
+vpm.encoder.layers.14.layer_norm1
+vpm.encoder.layers.14.mlp
+vpm.encoder.layers.14.mlp.activation_fn
+vpm.encoder.layers.14.mlp.fc1
+vpm.encoder.layers.14.mlp.fc2
+vpm.encoder.layers.14.layer_norm2
+vpm.encoder.layers.15
+vpm.encoder.layers.15.self_attn
+vpm.encoder.layers.15.self_attn.k_proj
+vpm.encoder.layers.15.self_attn.v_proj
+vpm.encoder.layers.15.self_attn.q_proj
+vpm.encoder.layers.15.self_attn.out_proj
+vpm.encoder.layers.15.layer_norm1
+vpm.encoder.layers.15.mlp
+vpm.encoder.layers.15.mlp.activation_fn
+vpm.encoder.layers.15.mlp.fc1
+vpm.encoder.layers.15.mlp.fc2
+vpm.encoder.layers.15.layer_norm2
+vpm.encoder.layers.16
+vpm.encoder.layers.16.self_attn
+vpm.encoder.layers.16.self_attn.k_proj
+vpm.encoder.layers.16.self_attn.v_proj
+vpm.encoder.layers.16.self_attn.q_proj
+vpm.encoder.layers.16.self_attn.out_proj
+vpm.encoder.layers.16.layer_norm1
+vpm.encoder.layers.16.mlp
+vpm.encoder.layers.16.mlp.activation_fn
+vpm.encoder.layers.16.mlp.fc1
+vpm.encoder.layers.16.mlp.fc2
+vpm.encoder.layers.16.layer_norm2
+vpm.encoder.layers.17
+vpm.encoder.layers.17.self_attn
+vpm.encoder.layers.17.self_attn.k_proj
+vpm.encoder.layers.17.self_attn.v_proj
+vpm.encoder.layers.17.self_attn.q_proj
+vpm.encoder.layers.17.self_attn.out_proj
+vpm.encoder.layers.17.layer_norm1
+vpm.encoder.layers.17.mlp
+vpm.encoder.layers.17.mlp.activation_fn
+vpm.encoder.layers.17.mlp.fc1
+vpm.encoder.layers.17.mlp.fc2
+vpm.encoder.layers.17.layer_norm2
+vpm.encoder.layers.18
+vpm.encoder.layers.18.self_attn
+vpm.encoder.layers.18.self_attn.k_proj
+vpm.encoder.layers.18.self_attn.v_proj
+vpm.encoder.layers.18.self_attn.q_proj
+vpm.encoder.layers.18.self_attn.out_proj
+vpm.encoder.layers.18.layer_norm1
+vpm.encoder.layers.18.mlp
+vpm.encoder.layers.18.mlp.activation_fn
+vpm.encoder.layers.18.mlp.fc1
+vpm.encoder.layers.18.mlp.fc2
+vpm.encoder.layers.18.layer_norm2
+vpm.encoder.layers.19
+vpm.encoder.layers.19.self_attn
+vpm.encoder.layers.19.self_attn.k_proj
+vpm.encoder.layers.19.self_attn.v_proj
+vpm.encoder.layers.19.self_attn.q_proj
+vpm.encoder.layers.19.self_attn.out_proj
+vpm.encoder.layers.19.layer_norm1
+vpm.encoder.layers.19.mlp
+vpm.encoder.layers.19.mlp.activation_fn
+vpm.encoder.layers.19.mlp.fc1
+vpm.encoder.layers.19.mlp.fc2
+vpm.encoder.layers.19.layer_norm2
+vpm.encoder.layers.20
+vpm.encoder.layers.20.self_attn
+vpm.encoder.layers.20.self_attn.k_proj
+vpm.encoder.layers.20.self_attn.v_proj
+vpm.encoder.layers.20.self_attn.q_proj
+vpm.encoder.layers.20.self_attn.out_proj
+vpm.encoder.layers.20.layer_norm1
+vpm.encoder.layers.20.mlp
+vpm.encoder.layers.20.mlp.activation_fn
+vpm.encoder.layers.20.mlp.fc1
+vpm.encoder.layers.20.mlp.fc2
+vpm.encoder.layers.20.layer_norm2
+vpm.encoder.layers.21
+vpm.encoder.layers.21.self_attn
+vpm.encoder.layers.21.self_attn.k_proj
+vpm.encoder.layers.21.self_attn.v_proj
+vpm.encoder.layers.21.self_attn.q_proj
+vpm.encoder.layers.21.self_attn.out_proj
+vpm.encoder.layers.21.layer_norm1
+vpm.encoder.layers.21.mlp
+vpm.encoder.layers.21.mlp.activation_fn
+vpm.encoder.layers.21.mlp.fc1
+vpm.encoder.layers.21.mlp.fc2
+vpm.encoder.layers.21.layer_norm2
+vpm.encoder.layers.22
+vpm.encoder.layers.22.self_attn
+vpm.encoder.layers.22.self_attn.k_proj
+vpm.encoder.layers.22.self_attn.v_proj
+vpm.encoder.layers.22.self_attn.q_proj
+vpm.encoder.layers.22.self_attn.out_proj
+vpm.encoder.layers.22.layer_norm1
+vpm.encoder.layers.22.mlp
+vpm.encoder.layers.22.mlp.activation_fn
+vpm.encoder.layers.22.mlp.fc1
+vpm.encoder.layers.22.mlp.fc2
+vpm.encoder.layers.22.layer_norm2
+vpm.encoder.layers.23
+vpm.encoder.layers.23.self_attn
+vpm.encoder.layers.23.self_attn.k_proj
+vpm.encoder.layers.23.self_attn.v_proj
+vpm.encoder.layers.23.self_attn.q_proj
+vpm.encoder.layers.23.self_attn.out_proj
+vpm.encoder.layers.23.layer_norm1
+vpm.encoder.layers.23.mlp
+vpm.encoder.layers.23.mlp.activation_fn
+vpm.encoder.layers.23.mlp.fc1
+vpm.encoder.layers.23.mlp.fc2
+vpm.encoder.layers.23.layer_norm2
+vpm.encoder.layers.24
+vpm.encoder.layers.24.self_attn
+vpm.encoder.layers.24.self_attn.k_proj
+vpm.encoder.layers.24.self_attn.v_proj
+vpm.encoder.layers.24.self_attn.q_proj
+vpm.encoder.layers.24.self_attn.out_proj
+vpm.encoder.layers.24.layer_norm1
+vpm.encoder.layers.24.mlp
+vpm.encoder.layers.24.mlp.activation_fn
+vpm.encoder.layers.24.mlp.fc1
+vpm.encoder.layers.24.mlp.fc2
+vpm.encoder.layers.24.layer_norm2
+vpm.encoder.layers.25
+vpm.encoder.layers.25.self_attn
+vpm.encoder.layers.25.self_attn.k_proj
+vpm.encoder.layers.25.self_attn.v_proj
+vpm.encoder.layers.25.self_attn.q_proj
+vpm.encoder.layers.25.self_attn.out_proj
+vpm.encoder.layers.25.layer_norm1
+vpm.encoder.layers.25.mlp
+vpm.encoder.layers.25.mlp.activation_fn
+vpm.encoder.layers.25.mlp.fc1
+vpm.encoder.layers.25.mlp.fc2
+vpm.encoder.layers.25.layer_norm2
+vpm.encoder.layers.26
+vpm.encoder.layers.26.self_attn
+vpm.encoder.layers.26.self_attn.k_proj
+vpm.encoder.layers.26.self_attn.v_proj
+vpm.encoder.layers.26.self_attn.q_proj
+vpm.encoder.layers.26.self_attn.out_proj
+vpm.encoder.layers.26.layer_norm1
+vpm.encoder.layers.26.mlp
+vpm.encoder.layers.26.mlp.activation_fn
+vpm.encoder.layers.26.mlp.fc1
+vpm.encoder.layers.26.mlp.fc2
+vpm.encoder.layers.26.layer_norm2
+vpm.post_layernorm
+resampler
+resampler.kv_proj
+resampler.attn
+resampler.attn.out_proj
+resampler.ln_q
+resampler.ln_kv
+resampler.ln_post
+apm
+apm.conv1
+apm.conv2
+apm.embed_positions
+apm.layers
+apm.layers.0
+apm.layers.0.self_attn
+apm.layers.0.self_attn.k_proj
+apm.layers.0.self_attn.v_proj
+apm.layers.0.self_attn.q_proj
+apm.layers.0.self_attn.out_proj
+apm.layers.0.self_attn_layer_norm
+apm.layers.0.activation_fn
+apm.layers.0.fc1
+apm.layers.0.fc2
+apm.layers.0.final_layer_norm
+apm.layers.1
+apm.layers.1.self_attn
+apm.layers.1.self_attn.k_proj
+apm.layers.1.self_attn.v_proj
+apm.layers.1.self_attn.q_proj
+apm.layers.1.self_attn.out_proj
+apm.layers.1.self_attn_layer_norm
+apm.layers.1.activation_fn
+apm.layers.1.fc1
+apm.layers.1.fc2
+apm.layers.1.final_layer_norm
+apm.layers.2
+apm.layers.2.self_attn
+apm.layers.2.self_attn.k_proj
+apm.layers.2.self_attn.v_proj
+apm.layers.2.self_attn.q_proj
+apm.layers.2.self_attn.out_proj
+apm.layers.2.self_attn_layer_norm
+apm.layers.2.activation_fn
+apm.layers.2.fc1
+apm.layers.2.fc2
+apm.layers.2.final_layer_norm
+apm.layers.3
+apm.layers.3.self_attn
+apm.layers.3.self_attn.k_proj
+apm.layers.3.self_attn.v_proj
+apm.layers.3.self_attn.q_proj
+apm.layers.3.self_attn.out_proj
+apm.layers.3.self_attn_layer_norm
+apm.layers.3.activation_fn
+apm.layers.3.fc1
+apm.layers.3.fc2
+apm.layers.3.final_layer_norm
+apm.layers.4
+apm.layers.4.self_attn
+apm.layers.4.self_attn.k_proj
+apm.layers.4.self_attn.v_proj
+apm.layers.4.self_attn.q_proj
+apm.layers.4.self_attn.out_proj
+apm.layers.4.self_attn_layer_norm
+apm.layers.4.activation_fn
+apm.layers.4.fc1
+apm.layers.4.fc2
+apm.layers.4.final_layer_norm
+apm.layers.5
+apm.layers.5.self_attn
+apm.layers.5.self_attn.k_proj
+apm.layers.5.self_attn.v_proj
+apm.layers.5.self_attn.q_proj
+apm.layers.5.self_attn.out_proj
+apm.layers.5.self_attn_layer_norm
+apm.layers.5.activation_fn
+apm.layers.5.fc1
+apm.layers.5.fc2
+apm.layers.5.final_layer_norm
+apm.layers.6
+apm.layers.6.self_attn
+apm.layers.6.self_attn.k_proj
+apm.layers.6.self_attn.v_proj
+apm.layers.6.self_attn.q_proj
+apm.layers.6.self_attn.out_proj
+apm.layers.6.self_attn_layer_norm
+apm.layers.6.activation_fn
+apm.layers.6.fc1
+apm.layers.6.fc2
+apm.layers.6.final_layer_norm
+apm.layers.7
+apm.layers.7.self_attn
+apm.layers.7.self_attn.k_proj
+apm.layers.7.self_attn.v_proj
+apm.layers.7.self_attn.q_proj
+apm.layers.7.self_attn.out_proj
+apm.layers.7.self_attn_layer_norm
+apm.layers.7.activation_fn
+apm.layers.7.fc1
+apm.layers.7.fc2
+apm.layers.7.final_layer_norm
+apm.layers.8
+apm.layers.8.self_attn
+apm.layers.8.self_attn.k_proj
+apm.layers.8.self_attn.v_proj
+apm.layers.8.self_attn.q_proj
+apm.layers.8.self_attn.out_proj
+apm.layers.8.self_attn_layer_norm
+apm.layers.8.activation_fn
+apm.layers.8.fc1
+apm.layers.8.fc2
+apm.layers.8.final_layer_norm
+apm.layers.9
+apm.layers.9.self_attn
+apm.layers.9.self_attn.k_proj
+apm.layers.9.self_attn.v_proj
+apm.layers.9.self_attn.q_proj
+apm.layers.9.self_attn.out_proj
+apm.layers.9.self_attn_layer_norm
+apm.layers.9.activation_fn
+apm.layers.9.fc1
+apm.layers.9.fc2
+apm.layers.9.final_layer_norm
+apm.layers.10
+apm.layers.10.self_attn
+apm.layers.10.self_attn.k_proj
+apm.layers.10.self_attn.v_proj
+apm.layers.10.self_attn.q_proj
+apm.layers.10.self_attn.out_proj
+apm.layers.10.self_attn_layer_norm
+apm.layers.10.activation_fn
+apm.layers.10.fc1
+apm.layers.10.fc2
+apm.layers.10.final_layer_norm
+apm.layers.11
+apm.layers.11.self_attn
+apm.layers.11.self_attn.k_proj
+apm.layers.11.self_attn.v_proj
+apm.layers.11.self_attn.q_proj
+apm.layers.11.self_attn.out_proj
+apm.layers.11.self_attn_layer_norm
+apm.layers.11.activation_fn
+apm.layers.11.fc1
+apm.layers.11.fc2
+apm.layers.11.final_layer_norm
+apm.layers.12
+apm.layers.12.self_attn
+apm.layers.12.self_attn.k_proj
+apm.layers.12.self_attn.v_proj
+apm.layers.12.self_attn.q_proj
+apm.layers.12.self_attn.out_proj
+apm.layers.12.self_attn_layer_norm
+apm.layers.12.activation_fn
+apm.layers.12.fc1
+apm.layers.12.fc2
+apm.layers.12.final_layer_norm
+apm.layers.13
+apm.layers.13.self_attn
+apm.layers.13.self_attn.k_proj
+apm.layers.13.self_attn.v_proj
+apm.layers.13.self_attn.q_proj
+apm.layers.13.self_attn.out_proj
+apm.layers.13.self_attn_layer_norm
+apm.layers.13.activation_fn
+apm.layers.13.fc1
+apm.layers.13.fc2
+apm.layers.13.final_layer_norm
+apm.layers.14
+apm.layers.14.self_attn
+apm.layers.14.self_attn.k_proj
+apm.layers.14.self_attn.v_proj
+apm.layers.14.self_attn.q_proj
+apm.layers.14.self_attn.out_proj
+apm.layers.14.self_attn_layer_norm
+apm.layers.14.activation_fn
+apm.layers.14.fc1
+apm.layers.14.fc2
+apm.layers.14.final_layer_norm
+apm.layers.15
+apm.layers.15.self_attn
+apm.layers.15.self_attn.k_proj
+apm.layers.15.self_attn.v_proj
+apm.layers.15.self_attn.q_proj
+apm.layers.15.self_attn.out_proj
+apm.layers.15.self_attn_layer_norm
+apm.layers.15.activation_fn
+apm.layers.15.fc1
+apm.layers.15.fc2
+apm.layers.15.final_layer_norm
+apm.layers.16
+apm.layers.16.self_attn
+apm.layers.16.self_attn.k_proj
+apm.layers.16.self_attn.v_proj
+apm.layers.16.self_attn.q_proj
+apm.layers.16.self_attn.out_proj
+apm.layers.16.self_attn_layer_norm
+apm.layers.16.activation_fn
+apm.layers.16.fc1
+apm.layers.16.fc2
+apm.layers.16.final_layer_norm
+apm.layers.17
+apm.layers.17.self_attn
+apm.layers.17.self_attn.k_proj
+apm.layers.17.self_attn.v_proj
+apm.layers.17.self_attn.q_proj
+apm.layers.17.self_attn.out_proj
+apm.layers.17.self_attn_layer_norm
+apm.layers.17.activation_fn
+apm.layers.17.fc1
+apm.layers.17.fc2
+apm.layers.17.final_layer_norm
+apm.layers.18
+apm.layers.18.self_attn
+apm.layers.18.self_attn.k_proj
+apm.layers.18.self_attn.v_proj
+apm.layers.18.self_attn.q_proj
+apm.layers.18.self_attn.out_proj
+apm.layers.18.self_attn_layer_norm
+apm.layers.18.activation_fn
+apm.layers.18.fc1
+apm.layers.18.fc2
+apm.layers.18.final_layer_norm
+apm.layers.19
+apm.layers.19.self_attn
+apm.layers.19.self_attn.k_proj
+apm.layers.19.self_attn.v_proj
+apm.layers.19.self_attn.q_proj
+apm.layers.19.self_attn.out_proj
+apm.layers.19.self_attn_layer_norm
+apm.layers.19.activation_fn
+apm.layers.19.fc1
+apm.layers.19.fc2
+apm.layers.19.final_layer_norm
+apm.layers.20
+apm.layers.20.self_attn
+apm.layers.20.self_attn.k_proj
+apm.layers.20.self_attn.v_proj
+apm.layers.20.self_attn.q_proj
+apm.layers.20.self_attn.out_proj
+apm.layers.20.self_attn_layer_norm
+apm.layers.20.activation_fn
+apm.layers.20.fc1
+apm.layers.20.fc2
+apm.layers.20.final_layer_norm
+apm.layers.21
+apm.layers.21.self_attn
+apm.layers.21.self_attn.k_proj
+apm.layers.21.self_attn.v_proj
+apm.layers.21.self_attn.q_proj
+apm.layers.21.self_attn.out_proj
+apm.layers.21.self_attn_layer_norm
+apm.layers.21.activation_fn
+apm.layers.21.fc1
+apm.layers.21.fc2
+apm.layers.21.final_layer_norm
+apm.layers.22
+apm.layers.22.self_attn
+apm.layers.22.self_attn.k_proj
+apm.layers.22.self_attn.v_proj
+apm.layers.22.self_attn.q_proj
+apm.layers.22.self_attn.out_proj
+apm.layers.22.self_attn_layer_norm
+apm.layers.22.activation_fn
+apm.layers.22.fc1
+apm.layers.22.fc2
+apm.layers.22.final_layer_norm
+apm.layers.23
+apm.layers.23.self_attn
+apm.layers.23.self_attn.k_proj
+apm.layers.23.self_attn.v_proj
+apm.layers.23.self_attn.q_proj
+apm.layers.23.self_attn.out_proj
+apm.layers.23.self_attn_layer_norm
+apm.layers.23.activation_fn
+apm.layers.23.fc1
+apm.layers.23.fc2
+apm.layers.23.final_layer_norm
+apm.layer_norm
+audio_avg_pooler
+audio_projection_layer
+audio_projection_layer.linear1
+audio_projection_layer.relu
+audio_projection_layer.linear2
+tts
+tts.projector
+tts.projector.linear1
+tts.projector.relu
+tts.projector.linear2
+tts.emb_code
+tts.emb_code.0
+tts.emb_code.1
+tts.emb_code.2
+tts.emb_code.3
+tts.emb_text
+tts.head_code
+tts.head_code.0
+tts.head_code.0.parametrizations
+tts.head_code.0.parametrizations.weight
+tts.head_code.0.parametrizations.weight.0
+tts.head_code.1
+tts.head_code.1.parametrizations
+tts.head_code.1.parametrizations.weight
+tts.head_code.1.parametrizations.weight.0
+tts.head_code.2
+tts.head_code.2.parametrizations
+tts.head_code.2.parametrizations.weight
+tts.head_code.2.parametrizations.weight.0
+tts.head_code.3
+tts.head_code.3.parametrizations
+tts.head_code.3.parametrizations.weight
+tts.head_code.3.parametrizations.weight.0
+tts.dvae
+tts.dvae.downsample_conv
+tts.dvae.downsample_conv.0
+tts.dvae.downsample_conv.1
+tts.dvae.downsample_conv.2
+tts.dvae.downsample_conv.3
+tts.dvae.encoder
+tts.dvae.encoder.conv_in
+tts.dvae.encoder.conv_in.0
+tts.dvae.encoder.conv_in.1
+tts.dvae.encoder.conv_in.2
+tts.dvae.encoder.decoder_block
+tts.dvae.encoder.decoder_block.0
+tts.dvae.encoder.decoder_block.0.dwconv
+tts.dvae.encoder.decoder_block.0.norm
+tts.dvae.encoder.decoder_block.0.pwconv1
+tts.dvae.encoder.decoder_block.0.act
+tts.dvae.encoder.decoder_block.0.pwconv2
+tts.dvae.encoder.decoder_block.1
+tts.dvae.encoder.decoder_block.1.dwconv
+tts.dvae.encoder.decoder_block.1.norm
+tts.dvae.encoder.decoder_block.1.pwconv1
+tts.dvae.encoder.decoder_block.1.act
+tts.dvae.encoder.decoder_block.1.pwconv2
+tts.dvae.encoder.decoder_block.2
+tts.dvae.encoder.decoder_block.2.dwconv
+tts.dvae.encoder.decoder_block.2.norm
+tts.dvae.encoder.decoder_block.2.pwconv1
+tts.dvae.encoder.decoder_block.2.act
+tts.dvae.encoder.decoder_block.2.pwconv2
+tts.dvae.encoder.decoder_block.3
+tts.dvae.encoder.decoder_block.3.dwconv
+tts.dvae.encoder.decoder_block.3.norm
+tts.dvae.encoder.decoder_block.3.pwconv1
+tts.dvae.encoder.decoder_block.3.act
+tts.dvae.encoder.decoder_block.3.pwconv2
+tts.dvae.encoder.decoder_block.4
+tts.dvae.encoder.decoder_block.4.dwconv
+tts.dvae.encoder.decoder_block.4.norm
+tts.dvae.encoder.decoder_block.4.pwconv1
+tts.dvae.encoder.decoder_block.4.act
+tts.dvae.encoder.decoder_block.4.pwconv2
+tts.dvae.encoder.decoder_block.5
+tts.dvae.encoder.decoder_block.5.dwconv
+tts.dvae.encoder.decoder_block.5.norm
+tts.dvae.encoder.decoder_block.5.pwconv1
+tts.dvae.encoder.decoder_block.5.act
+tts.dvae.encoder.decoder_block.5.pwconv2
+tts.dvae.encoder.decoder_block.6
+tts.dvae.encoder.decoder_block.6.dwconv
+tts.dvae.encoder.decoder_block.6.norm
+tts.dvae.encoder.decoder_block.6.pwconv1
+tts.dvae.encoder.decoder_block.6.act
+tts.dvae.encoder.decoder_block.6.pwconv2
+tts.dvae.encoder.decoder_block.7
+tts.dvae.encoder.decoder_block.7.dwconv
+tts.dvae.encoder.decoder_block.7.norm
+tts.dvae.encoder.decoder_block.7.pwconv1
+tts.dvae.encoder.decoder_block.7.act
+tts.dvae.encoder.decoder_block.7.pwconv2
+tts.dvae.encoder.decoder_block.8
+tts.dvae.encoder.decoder_block.8.dwconv
+tts.dvae.encoder.decoder_block.8.norm
+tts.dvae.encoder.decoder_block.8.pwconv1
+tts.dvae.encoder.decoder_block.8.act
+tts.dvae.encoder.decoder_block.8.pwconv2
+tts.dvae.encoder.decoder_block.9
+tts.dvae.encoder.decoder_block.9.dwconv
+tts.dvae.encoder.decoder_block.9.norm
+tts.dvae.encoder.decoder_block.9.pwconv1
+tts.dvae.encoder.decoder_block.9.act
+tts.dvae.encoder.decoder_block.9.pwconv2
+tts.dvae.encoder.decoder_block.10
+tts.dvae.encoder.decoder_block.10.dwconv
+tts.dvae.encoder.decoder_block.10.norm
+tts.dvae.encoder.decoder_block.10.pwconv1
+tts.dvae.encoder.decoder_block.10.act
+tts.dvae.encoder.decoder_block.10.pwconv2
+tts.dvae.encoder.decoder_block.11
+tts.dvae.encoder.decoder_block.11.dwconv
+tts.dvae.encoder.decoder_block.11.norm
+tts.dvae.encoder.decoder_block.11.pwconv1
+tts.dvae.encoder.decoder_block.11.act
+tts.dvae.encoder.decoder_block.11.pwconv2
+tts.dvae.encoder.conv_out
+tts.dvae.decoder
+tts.dvae.decoder.conv_in
+tts.dvae.decoder.conv_in.0
+tts.dvae.decoder.conv_in.1
+tts.dvae.decoder.conv_in.2
+tts.dvae.decoder.decoder_block
+tts.dvae.decoder.decoder_block.0
+tts.dvae.decoder.decoder_block.0.dwconv
+tts.dvae.decoder.decoder_block.0.norm
+tts.dvae.decoder.decoder_block.0.pwconv1
+tts.dvae.decoder.decoder_block.0.act
+tts.dvae.decoder.decoder_block.0.pwconv2
+tts.dvae.decoder.decoder_block.1
+tts.dvae.decoder.decoder_block.1.dwconv
+tts.dvae.decoder.decoder_block.1.norm
+tts.dvae.decoder.decoder_block.1.pwconv1
+tts.dvae.decoder.decoder_block.1.act
+tts.dvae.decoder.decoder_block.1.pwconv2
+tts.dvae.decoder.decoder_block.2
+tts.dvae.decoder.decoder_block.2.dwconv
+tts.dvae.decoder.decoder_block.2.norm
+tts.dvae.decoder.decoder_block.2.pwconv1
+tts.dvae.decoder.decoder_block.2.act
+tts.dvae.decoder.decoder_block.2.pwconv2
+tts.dvae.decoder.decoder_block.3
+tts.dvae.decoder.decoder_block.3.dwconv
+tts.dvae.decoder.decoder_block.3.norm
+tts.dvae.decoder.decoder_block.3.pwconv1
+tts.dvae.decoder.decoder_block.3.act
+tts.dvae.decoder.decoder_block.3.pwconv2
+tts.dvae.decoder.decoder_block.4
+tts.dvae.decoder.decoder_block.4.dwconv
+tts.dvae.decoder.decoder_block.4.norm
+tts.dvae.decoder.decoder_block.4.pwconv1
+tts.dvae.decoder.decoder_block.4.act
+tts.dvae.decoder.decoder_block.4.pwconv2
+tts.dvae.decoder.decoder_block.5
+tts.dvae.decoder.decoder_block.5.dwconv
+tts.dvae.decoder.decoder_block.5.norm
+tts.dvae.decoder.decoder_block.5.pwconv1
+tts.dvae.decoder.decoder_block.5.act
+tts.dvae.decoder.decoder_block.5.pwconv2
+tts.dvae.decoder.decoder_block.6
+tts.dvae.decoder.decoder_block.6.dwconv
+tts.dvae.decoder.decoder_block.6.norm
+tts.dvae.decoder.decoder_block.6.pwconv1
+tts.dvae.decoder.decoder_block.6.act
+tts.dvae.decoder.decoder_block.6.pwconv2
+tts.dvae.decoder.decoder_block.7
+tts.dvae.decoder.decoder_block.7.dwconv
+tts.dvae.decoder.decoder_block.7.norm
+tts.dvae.decoder.decoder_block.7.pwconv1
+tts.dvae.decoder.decoder_block.7.act
+tts.dvae.decoder.decoder_block.7.pwconv2
+tts.dvae.decoder.decoder_block.8
+tts.dvae.decoder.decoder_block.8.dwconv
+tts.dvae.decoder.decoder_block.8.norm
+tts.dvae.decoder.decoder_block.8.pwconv1
+tts.dvae.decoder.decoder_block.8.act
+tts.dvae.decoder.decoder_block.8.pwconv2
+tts.dvae.decoder.decoder_block.9
+tts.dvae.decoder.decoder_block.9.dwconv
+tts.dvae.decoder.decoder_block.9.norm
+tts.dvae.decoder.decoder_block.9.pwconv1
+tts.dvae.decoder.decoder_block.9.act
+tts.dvae.decoder.decoder_block.9.pwconv2
+tts.dvae.decoder.decoder_block.10
+tts.dvae.decoder.decoder_block.10.dwconv
+tts.dvae.decoder.decoder_block.10.norm
+tts.dvae.decoder.decoder_block.10.pwconv1
+tts.dvae.decoder.decoder_block.10.act
+tts.dvae.decoder.decoder_block.10.pwconv2
+tts.dvae.decoder.decoder_block.11
+tts.dvae.decoder.decoder_block.11.dwconv
+tts.dvae.decoder.decoder_block.11.norm
+tts.dvae.decoder.decoder_block.11.pwconv1
+tts.dvae.decoder.decoder_block.11.act
+tts.dvae.decoder.decoder_block.11.pwconv2
+tts.dvae.decoder.conv_out
+tts.dvae.out_conv
+tts.dvae.vq_layer
+tts.dvae.vq_layer.quantizer
+tts.dvae.vq_layer.quantizer.rvqs
+tts.dvae.vq_layer.quantizer.rvqs.0
+tts.dvae.vq_layer.quantizer.rvqs.0.project_in
+tts.dvae.vq_layer.quantizer.rvqs.0.project_out
+tts.dvae.vq_layer.quantizer.rvqs.0.layers
+tts.dvae.vq_layer.quantizer.rvqs.0.layers.0
+tts.dvae.vq_layer.quantizer.rvqs.0.layers.0.project_in
+tts.dvae.vq_layer.quantizer.rvqs.0.layers.0.project_out
+tts.dvae.vq_layer.quantizer.rvqs.0.layers.1
+tts.dvae.vq_layer.quantizer.rvqs.0.layers.1.project_in
+tts.dvae.vq_layer.quantizer.rvqs.0.layers.1.project_out
+tts.dvae.vq_layer.quantizer.rvqs.1
+tts.dvae.vq_layer.quantizer.rvqs.1.project_in
+tts.dvae.vq_layer.quantizer.rvqs.1.project_out
+tts.dvae.vq_layer.quantizer.rvqs.1.layers
+tts.dvae.vq_layer.quantizer.rvqs.1.layers.0
+tts.dvae.vq_layer.quantizer.rvqs.1.layers.0.project_in
+tts.dvae.vq_layer.quantizer.rvqs.1.layers.0.project_out
+tts.dvae.vq_layer.quantizer.rvqs.1.layers.1
+tts.dvae.vq_layer.quantizer.rvqs.1.layers.1.project_in
+tts.dvae.vq_layer.quantizer.rvqs.1.layers.1.project_out
+tts.model
+tts.model.embed_tokens
+tts.model.layers
+tts.model.layers.0
+tts.model.layers.0.self_attn
+tts.model.layers.0.self_attn.q_proj
+tts.model.layers.0.self_attn.k_proj
+tts.model.layers.0.self_attn.v_proj
+tts.model.layers.0.self_attn.o_proj
+tts.model.layers.0.mlp
+tts.model.layers.0.mlp.gate_proj
+tts.model.layers.0.mlp.up_proj
+tts.model.layers.0.mlp.down_proj
+tts.model.layers.0.mlp.act_fn
+tts.model.layers.0.input_layernorm
+tts.model.layers.0.post_attention_layernorm
+tts.model.layers.1
+tts.model.layers.1.self_attn
+tts.model.layers.1.self_attn.q_proj
+tts.model.layers.1.self_attn.k_proj
+tts.model.layers.1.self_attn.v_proj
+tts.model.layers.1.self_attn.o_proj
+tts.model.layers.1.mlp
+tts.model.layers.1.mlp.gate_proj
+tts.model.layers.1.mlp.up_proj
+tts.model.layers.1.mlp.down_proj
+tts.model.layers.1.mlp.act_fn
+tts.model.layers.1.input_layernorm
+tts.model.layers.1.post_attention_layernorm
+tts.model.layers.2
+tts.model.layers.2.self_attn
+tts.model.layers.2.self_attn.q_proj
+tts.model.layers.2.self_attn.k_proj
+tts.model.layers.2.self_attn.v_proj
+tts.model.layers.2.self_attn.o_proj
+tts.model.layers.2.mlp
+tts.model.layers.2.mlp.gate_proj
+tts.model.layers.2.mlp.up_proj
+tts.model.layers.2.mlp.down_proj
+tts.model.layers.2.mlp.act_fn
+tts.model.layers.2.input_layernorm
+tts.model.layers.2.post_attention_layernorm
+tts.model.layers.3
+tts.model.layers.3.self_attn
+tts.model.layers.3.self_attn.q_proj
+tts.model.layers.3.self_attn.k_proj
+tts.model.layers.3.self_attn.v_proj
+tts.model.layers.3.self_attn.o_proj
+tts.model.layers.3.mlp
+tts.model.layers.3.mlp.gate_proj
+tts.model.layers.3.mlp.up_proj
+tts.model.layers.3.mlp.down_proj
+tts.model.layers.3.mlp.act_fn
+tts.model.layers.3.input_layernorm
+tts.model.layers.3.post_attention_layernorm
+tts.model.layers.4
+tts.model.layers.4.self_attn
+tts.model.layers.4.self_attn.q_proj
+tts.model.layers.4.self_attn.k_proj
+tts.model.layers.4.self_attn.v_proj
+tts.model.layers.4.self_attn.o_proj
+tts.model.layers.4.mlp
+tts.model.layers.4.mlp.gate_proj
+tts.model.layers.4.mlp.up_proj
+tts.model.layers.4.mlp.down_proj
+tts.model.layers.4.mlp.act_fn
+tts.model.layers.4.input_layernorm
+tts.model.layers.4.post_attention_layernorm
+tts.model.layers.5
+tts.model.layers.5.self_attn
+tts.model.layers.5.self_attn.q_proj
+tts.model.layers.5.self_attn.k_proj
+tts.model.layers.5.self_attn.v_proj
+tts.model.layers.5.self_attn.o_proj
+tts.model.layers.5.mlp
+tts.model.layers.5.mlp.gate_proj
+tts.model.layers.5.mlp.up_proj
+tts.model.layers.5.mlp.down_proj
+tts.model.layers.5.mlp.act_fn
+tts.model.layers.5.input_layernorm
+tts.model.layers.5.post_attention_layernorm
+tts.model.layers.6
+tts.model.layers.6.self_attn
+tts.model.layers.6.self_attn.q_proj
+tts.model.layers.6.self_attn.k_proj
+tts.model.layers.6.self_attn.v_proj
+tts.model.layers.6.self_attn.o_proj
+tts.model.layers.6.mlp
+tts.model.layers.6.mlp.gate_proj
+tts.model.layers.6.mlp.up_proj
+tts.model.layers.6.mlp.down_proj
+tts.model.layers.6.mlp.act_fn
+tts.model.layers.6.input_layernorm
+tts.model.layers.6.post_attention_layernorm
+tts.model.layers.7
+tts.model.layers.7.self_attn
+tts.model.layers.7.self_attn.q_proj
+tts.model.layers.7.self_attn.k_proj
+tts.model.layers.7.self_attn.v_proj
+tts.model.layers.7.self_attn.o_proj
+tts.model.layers.7.mlp
+tts.model.layers.7.mlp.gate_proj
+tts.model.layers.7.mlp.up_proj
+tts.model.layers.7.mlp.down_proj
+tts.model.layers.7.mlp.act_fn
+tts.model.layers.7.input_layernorm
+tts.model.layers.7.post_attention_layernorm
+tts.model.layers.8
+tts.model.layers.8.self_attn
+tts.model.layers.8.self_attn.q_proj
+tts.model.layers.8.self_attn.k_proj
+tts.model.layers.8.self_attn.v_proj
+tts.model.layers.8.self_attn.o_proj
+tts.model.layers.8.mlp
+tts.model.layers.8.mlp.gate_proj
+tts.model.layers.8.mlp.up_proj
+tts.model.layers.8.mlp.down_proj
+tts.model.layers.8.mlp.act_fn
+tts.model.layers.8.input_layernorm
+tts.model.layers.8.post_attention_layernorm
+tts.model.layers.9
+tts.model.layers.9.self_attn
+tts.model.layers.9.self_attn.q_proj
+tts.model.layers.9.self_attn.k_proj
+tts.model.layers.9.self_attn.v_proj
+tts.model.layers.9.self_attn.o_proj
+tts.model.layers.9.mlp
+tts.model.layers.9.mlp.gate_proj
+tts.model.layers.9.mlp.up_proj
+tts.model.layers.9.mlp.down_proj
+tts.model.layers.9.mlp.act_fn
+tts.model.layers.9.input_layernorm
+tts.model.layers.9.post_attention_layernorm
+tts.model.layers.10
+tts.model.layers.10.self_attn
+tts.model.layers.10.self_attn.q_proj
+tts.model.layers.10.self_attn.k_proj
+tts.model.layers.10.self_attn.v_proj
+tts.model.layers.10.self_attn.o_proj
+tts.model.layers.10.mlp
+tts.model.layers.10.mlp.gate_proj
+tts.model.layers.10.mlp.up_proj
+tts.model.layers.10.mlp.down_proj
+tts.model.layers.10.mlp.act_fn
+tts.model.layers.10.input_layernorm
+tts.model.layers.10.post_attention_layernorm
+tts.model.layers.11
+tts.model.layers.11.self_attn
+tts.model.layers.11.self_attn.q_proj
+tts.model.layers.11.self_attn.k_proj
+tts.model.layers.11.self_attn.v_proj
+tts.model.layers.11.self_attn.o_proj
+tts.model.layers.11.mlp
+tts.model.layers.11.mlp.gate_proj
+tts.model.layers.11.mlp.up_proj
+tts.model.layers.11.mlp.down_proj
+tts.model.layers.11.mlp.act_fn
+tts.model.layers.11.input_layernorm
+tts.model.layers.11.post_attention_layernorm
+tts.model.layers.12
+tts.model.layers.12.self_attn
+tts.model.layers.12.self_attn.q_proj
+tts.model.layers.12.self_attn.k_proj
+tts.model.layers.12.self_attn.v_proj
+tts.model.layers.12.self_attn.o_proj
+tts.model.layers.12.mlp
+tts.model.layers.12.mlp.gate_proj
+tts.model.layers.12.mlp.up_proj
+tts.model.layers.12.mlp.down_proj
+tts.model.layers.12.mlp.act_fn
+tts.model.layers.12.input_layernorm
+tts.model.layers.12.post_attention_layernorm
+tts.model.layers.13
+tts.model.layers.13.self_attn
+tts.model.layers.13.self_attn.q_proj
+tts.model.layers.13.self_attn.k_proj
+tts.model.layers.13.self_attn.v_proj
+tts.model.layers.13.self_attn.o_proj
+tts.model.layers.13.mlp
+tts.model.layers.13.mlp.gate_proj
+tts.model.layers.13.mlp.up_proj
+tts.model.layers.13.mlp.down_proj
+tts.model.layers.13.mlp.act_fn
+tts.model.layers.13.input_layernorm
+tts.model.layers.13.post_attention_layernorm
+tts.model.layers.14
+tts.model.layers.14.self_attn
+tts.model.layers.14.self_attn.q_proj
+tts.model.layers.14.self_attn.k_proj
+tts.model.layers.14.self_attn.v_proj
+tts.model.layers.14.self_attn.o_proj
+tts.model.layers.14.mlp
+tts.model.layers.14.mlp.gate_proj
+tts.model.layers.14.mlp.up_proj
+tts.model.layers.14.mlp.down_proj
+tts.model.layers.14.mlp.act_fn
+tts.model.layers.14.input_layernorm
+tts.model.layers.14.post_attention_layernorm
+tts.model.layers.15
+tts.model.layers.15.self_attn
+tts.model.layers.15.self_attn.q_proj
+tts.model.layers.15.self_attn.k_proj
+tts.model.layers.15.self_attn.v_proj
+tts.model.layers.15.self_attn.o_proj
+tts.model.layers.15.mlp
+tts.model.layers.15.mlp.gate_proj
+tts.model.layers.15.mlp.up_proj
+tts.model.layers.15.mlp.down_proj
+tts.model.layers.15.mlp.act_fn
+tts.model.layers.15.input_layernorm
+tts.model.layers.15.post_attention_layernorm
+tts.model.layers.16
+tts.model.layers.16.self_attn
+tts.model.layers.16.self_attn.q_proj
+tts.model.layers.16.self_attn.k_proj
+tts.model.layers.16.self_attn.v_proj
+tts.model.layers.16.self_attn.o_proj
+tts.model.layers.16.mlp
+tts.model.layers.16.mlp.gate_proj
+tts.model.layers.16.mlp.up_proj
+tts.model.layers.16.mlp.down_proj
+tts.model.layers.16.mlp.act_fn
+tts.model.layers.16.input_layernorm
+tts.model.layers.16.post_attention_layernorm
+tts.model.layers.17
+tts.model.layers.17.self_attn
+tts.model.layers.17.self_attn.q_proj
+tts.model.layers.17.self_attn.k_proj
+tts.model.layers.17.self_attn.v_proj
+tts.model.layers.17.self_attn.o_proj
+tts.model.layers.17.mlp
+tts.model.layers.17.mlp.gate_proj
+tts.model.layers.17.mlp.up_proj
+tts.model.layers.17.mlp.down_proj
+tts.model.layers.17.mlp.act_fn
+tts.model.layers.17.input_layernorm
+tts.model.layers.17.post_attention_layernorm
+tts.model.layers.18
+tts.model.layers.18.self_attn
+tts.model.layers.18.self_attn.q_proj
+tts.model.layers.18.self_attn.k_proj
+tts.model.layers.18.self_attn.v_proj
+tts.model.layers.18.self_attn.o_proj
+tts.model.layers.18.mlp
+tts.model.layers.18.mlp.gate_proj
+tts.model.layers.18.mlp.up_proj
+tts.model.layers.18.mlp.down_proj
+tts.model.layers.18.mlp.act_fn
+tts.model.layers.18.input_layernorm
+tts.model.layers.18.post_attention_layernorm
+tts.model.layers.19
+tts.model.layers.19.self_attn
+tts.model.layers.19.self_attn.q_proj
+tts.model.layers.19.self_attn.k_proj
+tts.model.layers.19.self_attn.v_proj
+tts.model.layers.19.self_attn.o_proj
+tts.model.layers.19.mlp
+tts.model.layers.19.mlp.gate_proj
+tts.model.layers.19.mlp.up_proj
+tts.model.layers.19.mlp.down_proj
+tts.model.layers.19.mlp.act_fn
+tts.model.layers.19.input_layernorm
+tts.model.layers.19.post_attention_layernorm
+tts.model.norm
+tts.model.rotary_emb
diff --git a/logs/paligemma/paligemma-3b.txt b/logs/paligemma/paligemma-3b.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a36987831de20e01bc5432d5ebb4d530ca6d4422
--- /dev/null
+++ b/logs/paligemma/paligemma-3b.txt
@@ -0,0 +1,575 @@
+vision_tower
+vision_tower.vision_model
+vision_tower.vision_model.embeddings
+vision_tower.vision_model.embeddings.patch_embedding
+vision_tower.vision_model.embeddings.position_embedding
+vision_tower.vision_model.encoder
+vision_tower.vision_model.encoder.layers
+vision_tower.vision_model.encoder.layers.0
+vision_tower.vision_model.encoder.layers.0.layer_norm1
+vision_tower.vision_model.encoder.layers.0.self_attn
+vision_tower.vision_model.encoder.layers.0.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.0.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.0.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.0.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.0.layer_norm2
+vision_tower.vision_model.encoder.layers.0.mlp
+vision_tower.vision_model.encoder.layers.0.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.0.mlp.fc1
+vision_tower.vision_model.encoder.layers.0.mlp.fc2
+vision_tower.vision_model.encoder.layers.1
+vision_tower.vision_model.encoder.layers.1.layer_norm1
+vision_tower.vision_model.encoder.layers.1.self_attn
+vision_tower.vision_model.encoder.layers.1.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.1.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.1.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.1.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.1.layer_norm2
+vision_tower.vision_model.encoder.layers.1.mlp
+vision_tower.vision_model.encoder.layers.1.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.1.mlp.fc1
+vision_tower.vision_model.encoder.layers.1.mlp.fc2
+vision_tower.vision_model.encoder.layers.2
+vision_tower.vision_model.encoder.layers.2.layer_norm1
+vision_tower.vision_model.encoder.layers.2.self_attn
+vision_tower.vision_model.encoder.layers.2.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.2.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.2.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.2.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.2.layer_norm2
+vision_tower.vision_model.encoder.layers.2.mlp
+vision_tower.vision_model.encoder.layers.2.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.2.mlp.fc1
+vision_tower.vision_model.encoder.layers.2.mlp.fc2
+vision_tower.vision_model.encoder.layers.3
+vision_tower.vision_model.encoder.layers.3.layer_norm1
+vision_tower.vision_model.encoder.layers.3.self_attn
+vision_tower.vision_model.encoder.layers.3.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.3.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.3.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.3.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.3.layer_norm2
+vision_tower.vision_model.encoder.layers.3.mlp
+vision_tower.vision_model.encoder.layers.3.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.3.mlp.fc1
+vision_tower.vision_model.encoder.layers.3.mlp.fc2
+vision_tower.vision_model.encoder.layers.4
+vision_tower.vision_model.encoder.layers.4.layer_norm1
+vision_tower.vision_model.encoder.layers.4.self_attn
+vision_tower.vision_model.encoder.layers.4.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.4.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.4.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.4.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.4.layer_norm2
+vision_tower.vision_model.encoder.layers.4.mlp
+vision_tower.vision_model.encoder.layers.4.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.4.mlp.fc1
+vision_tower.vision_model.encoder.layers.4.mlp.fc2
+vision_tower.vision_model.encoder.layers.5
+vision_tower.vision_model.encoder.layers.5.layer_norm1
+vision_tower.vision_model.encoder.layers.5.self_attn
+vision_tower.vision_model.encoder.layers.5.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.5.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.5.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.5.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.5.layer_norm2
+vision_tower.vision_model.encoder.layers.5.mlp
+vision_tower.vision_model.encoder.layers.5.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.5.mlp.fc1
+vision_tower.vision_model.encoder.layers.5.mlp.fc2
+vision_tower.vision_model.encoder.layers.6
+vision_tower.vision_model.encoder.layers.6.layer_norm1
+vision_tower.vision_model.encoder.layers.6.self_attn
+vision_tower.vision_model.encoder.layers.6.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.6.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.6.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.6.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.6.layer_norm2
+vision_tower.vision_model.encoder.layers.6.mlp
+vision_tower.vision_model.encoder.layers.6.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.6.mlp.fc1
+vision_tower.vision_model.encoder.layers.6.mlp.fc2
+vision_tower.vision_model.encoder.layers.7
+vision_tower.vision_model.encoder.layers.7.layer_norm1
+vision_tower.vision_model.encoder.layers.7.self_attn
+vision_tower.vision_model.encoder.layers.7.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.7.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.7.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.7.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.7.layer_norm2
+vision_tower.vision_model.encoder.layers.7.mlp
+vision_tower.vision_model.encoder.layers.7.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.7.mlp.fc1
+vision_tower.vision_model.encoder.layers.7.mlp.fc2
+vision_tower.vision_model.encoder.layers.8
+vision_tower.vision_model.encoder.layers.8.layer_norm1
+vision_tower.vision_model.encoder.layers.8.self_attn
+vision_tower.vision_model.encoder.layers.8.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.8.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.8.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.8.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.8.layer_norm2
+vision_tower.vision_model.encoder.layers.8.mlp
+vision_tower.vision_model.encoder.layers.8.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.8.mlp.fc1
+vision_tower.vision_model.encoder.layers.8.mlp.fc2
+vision_tower.vision_model.encoder.layers.9
+vision_tower.vision_model.encoder.layers.9.layer_norm1
+vision_tower.vision_model.encoder.layers.9.self_attn
+vision_tower.vision_model.encoder.layers.9.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.9.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.9.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.9.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.9.layer_norm2
+vision_tower.vision_model.encoder.layers.9.mlp
+vision_tower.vision_model.encoder.layers.9.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.9.mlp.fc1
+vision_tower.vision_model.encoder.layers.9.mlp.fc2
+vision_tower.vision_model.encoder.layers.10
+vision_tower.vision_model.encoder.layers.10.layer_norm1
+vision_tower.vision_model.encoder.layers.10.self_attn
+vision_tower.vision_model.encoder.layers.10.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.10.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.10.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.10.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.10.layer_norm2
+vision_tower.vision_model.encoder.layers.10.mlp
+vision_tower.vision_model.encoder.layers.10.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.10.mlp.fc1
+vision_tower.vision_model.encoder.layers.10.mlp.fc2
+vision_tower.vision_model.encoder.layers.11
+vision_tower.vision_model.encoder.layers.11.layer_norm1
+vision_tower.vision_model.encoder.layers.11.self_attn
+vision_tower.vision_model.encoder.layers.11.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.11.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.11.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.11.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.11.layer_norm2
+vision_tower.vision_model.encoder.layers.11.mlp
+vision_tower.vision_model.encoder.layers.11.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.11.mlp.fc1
+vision_tower.vision_model.encoder.layers.11.mlp.fc2
+vision_tower.vision_model.encoder.layers.12
+vision_tower.vision_model.encoder.layers.12.layer_norm1
+vision_tower.vision_model.encoder.layers.12.self_attn
+vision_tower.vision_model.encoder.layers.12.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.12.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.12.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.12.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.12.layer_norm2
+vision_tower.vision_model.encoder.layers.12.mlp
+vision_tower.vision_model.encoder.layers.12.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.12.mlp.fc1
+vision_tower.vision_model.encoder.layers.12.mlp.fc2
+vision_tower.vision_model.encoder.layers.13
+vision_tower.vision_model.encoder.layers.13.layer_norm1
+vision_tower.vision_model.encoder.layers.13.self_attn
+vision_tower.vision_model.encoder.layers.13.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.13.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.13.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.13.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.13.layer_norm2
+vision_tower.vision_model.encoder.layers.13.mlp
+vision_tower.vision_model.encoder.layers.13.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.13.mlp.fc1
+vision_tower.vision_model.encoder.layers.13.mlp.fc2
+vision_tower.vision_model.encoder.layers.14
+vision_tower.vision_model.encoder.layers.14.layer_norm1
+vision_tower.vision_model.encoder.layers.14.self_attn
+vision_tower.vision_model.encoder.layers.14.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.14.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.14.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.14.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.14.layer_norm2
+vision_tower.vision_model.encoder.layers.14.mlp
+vision_tower.vision_model.encoder.layers.14.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.14.mlp.fc1
+vision_tower.vision_model.encoder.layers.14.mlp.fc2
+vision_tower.vision_model.encoder.layers.15
+vision_tower.vision_model.encoder.layers.15.layer_norm1
+vision_tower.vision_model.encoder.layers.15.self_attn
+vision_tower.vision_model.encoder.layers.15.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.15.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.15.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.15.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.15.layer_norm2
+vision_tower.vision_model.encoder.layers.15.mlp
+vision_tower.vision_model.encoder.layers.15.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.15.mlp.fc1
+vision_tower.vision_model.encoder.layers.15.mlp.fc2
+vision_tower.vision_model.encoder.layers.16
+vision_tower.vision_model.encoder.layers.16.layer_norm1
+vision_tower.vision_model.encoder.layers.16.self_attn
+vision_tower.vision_model.encoder.layers.16.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.16.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.16.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.16.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.16.layer_norm2
+vision_tower.vision_model.encoder.layers.16.mlp
+vision_tower.vision_model.encoder.layers.16.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.16.mlp.fc1
+vision_tower.vision_model.encoder.layers.16.mlp.fc2
+vision_tower.vision_model.encoder.layers.17
+vision_tower.vision_model.encoder.layers.17.layer_norm1
+vision_tower.vision_model.encoder.layers.17.self_attn
+vision_tower.vision_model.encoder.layers.17.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.17.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.17.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.17.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.17.layer_norm2
+vision_tower.vision_model.encoder.layers.17.mlp
+vision_tower.vision_model.encoder.layers.17.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.17.mlp.fc1
+vision_tower.vision_model.encoder.layers.17.mlp.fc2
+vision_tower.vision_model.encoder.layers.18
+vision_tower.vision_model.encoder.layers.18.layer_norm1
+vision_tower.vision_model.encoder.layers.18.self_attn
+vision_tower.vision_model.encoder.layers.18.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.18.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.18.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.18.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.18.layer_norm2
+vision_tower.vision_model.encoder.layers.18.mlp
+vision_tower.vision_model.encoder.layers.18.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.18.mlp.fc1
+vision_tower.vision_model.encoder.layers.18.mlp.fc2
+vision_tower.vision_model.encoder.layers.19
+vision_tower.vision_model.encoder.layers.19.layer_norm1
+vision_tower.vision_model.encoder.layers.19.self_attn
+vision_tower.vision_model.encoder.layers.19.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.19.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.19.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.19.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.19.layer_norm2
+vision_tower.vision_model.encoder.layers.19.mlp
+vision_tower.vision_model.encoder.layers.19.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.19.mlp.fc1
+vision_tower.vision_model.encoder.layers.19.mlp.fc2
+vision_tower.vision_model.encoder.layers.20
+vision_tower.vision_model.encoder.layers.20.layer_norm1
+vision_tower.vision_model.encoder.layers.20.self_attn
+vision_tower.vision_model.encoder.layers.20.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.20.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.20.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.20.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.20.layer_norm2
+vision_tower.vision_model.encoder.layers.20.mlp
+vision_tower.vision_model.encoder.layers.20.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.20.mlp.fc1
+vision_tower.vision_model.encoder.layers.20.mlp.fc2
+vision_tower.vision_model.encoder.layers.21
+vision_tower.vision_model.encoder.layers.21.layer_norm1
+vision_tower.vision_model.encoder.layers.21.self_attn
+vision_tower.vision_model.encoder.layers.21.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.21.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.21.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.21.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.21.layer_norm2
+vision_tower.vision_model.encoder.layers.21.mlp
+vision_tower.vision_model.encoder.layers.21.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.21.mlp.fc1
+vision_tower.vision_model.encoder.layers.21.mlp.fc2
+vision_tower.vision_model.encoder.layers.22
+vision_tower.vision_model.encoder.layers.22.layer_norm1
+vision_tower.vision_model.encoder.layers.22.self_attn
+vision_tower.vision_model.encoder.layers.22.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.22.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.22.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.22.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.22.layer_norm2
+vision_tower.vision_model.encoder.layers.22.mlp
+vision_tower.vision_model.encoder.layers.22.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.22.mlp.fc1
+vision_tower.vision_model.encoder.layers.22.mlp.fc2
+vision_tower.vision_model.encoder.layers.23
+vision_tower.vision_model.encoder.layers.23.layer_norm1
+vision_tower.vision_model.encoder.layers.23.self_attn
+vision_tower.vision_model.encoder.layers.23.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.23.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.23.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.23.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.23.layer_norm2
+vision_tower.vision_model.encoder.layers.23.mlp
+vision_tower.vision_model.encoder.layers.23.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.23.mlp.fc1
+vision_tower.vision_model.encoder.layers.23.mlp.fc2
+vision_tower.vision_model.encoder.layers.24
+vision_tower.vision_model.encoder.layers.24.layer_norm1
+vision_tower.vision_model.encoder.layers.24.self_attn
+vision_tower.vision_model.encoder.layers.24.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.24.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.24.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.24.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.24.layer_norm2
+vision_tower.vision_model.encoder.layers.24.mlp
+vision_tower.vision_model.encoder.layers.24.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.24.mlp.fc1
+vision_tower.vision_model.encoder.layers.24.mlp.fc2
+vision_tower.vision_model.encoder.layers.25
+vision_tower.vision_model.encoder.layers.25.layer_norm1
+vision_tower.vision_model.encoder.layers.25.self_attn
+vision_tower.vision_model.encoder.layers.25.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.25.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.25.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.25.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.25.layer_norm2
+vision_tower.vision_model.encoder.layers.25.mlp
+vision_tower.vision_model.encoder.layers.25.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.25.mlp.fc1
+vision_tower.vision_model.encoder.layers.25.mlp.fc2
+vision_tower.vision_model.encoder.layers.26
+vision_tower.vision_model.encoder.layers.26.layer_norm1
+vision_tower.vision_model.encoder.layers.26.self_attn
+vision_tower.vision_model.encoder.layers.26.self_attn.k_proj
+vision_tower.vision_model.encoder.layers.26.self_attn.v_proj
+vision_tower.vision_model.encoder.layers.26.self_attn.q_proj
+vision_tower.vision_model.encoder.layers.26.self_attn.out_proj
+vision_tower.vision_model.encoder.layers.26.layer_norm2
+vision_tower.vision_model.encoder.layers.26.mlp
+vision_tower.vision_model.encoder.layers.26.mlp.activation_fn
+vision_tower.vision_model.encoder.layers.26.mlp.fc1
+vision_tower.vision_model.encoder.layers.26.mlp.fc2
+vision_tower.vision_model.post_layernorm
+multi_modal_projector
+multi_modal_projector.linear
+language_model
+language_model.model
+language_model.model.embed_tokens
+language_model.model.layers
+language_model.model.layers.0
+language_model.model.layers.0.self_attn
+language_model.model.layers.0.self_attn.q_proj
+language_model.model.layers.0.self_attn.k_proj
+language_model.model.layers.0.self_attn.v_proj
+language_model.model.layers.0.self_attn.o_proj
+language_model.model.layers.0.mlp
+language_model.model.layers.0.mlp.gate_proj
+language_model.model.layers.0.mlp.up_proj
+language_model.model.layers.0.mlp.down_proj
+language_model.model.layers.0.mlp.act_fn
+language_model.model.layers.0.input_layernorm
+language_model.model.layers.0.post_attention_layernorm
+language_model.model.layers.1
+language_model.model.layers.1.self_attn
+language_model.model.layers.1.self_attn.q_proj
+language_model.model.layers.1.self_attn.k_proj
+language_model.model.layers.1.self_attn.v_proj
+language_model.model.layers.1.self_attn.o_proj
+language_model.model.layers.1.mlp
+language_model.model.layers.1.mlp.gate_proj
+language_model.model.layers.1.mlp.up_proj
+language_model.model.layers.1.mlp.down_proj
+language_model.model.layers.1.mlp.act_fn
+language_model.model.layers.1.input_layernorm
+language_model.model.layers.1.post_attention_layernorm
+language_model.model.layers.2
+language_model.model.layers.2.self_attn
+language_model.model.layers.2.self_attn.q_proj
+language_model.model.layers.2.self_attn.k_proj
+language_model.model.layers.2.self_attn.v_proj
+language_model.model.layers.2.self_attn.o_proj
+language_model.model.layers.2.mlp
+language_model.model.layers.2.mlp.gate_proj
+language_model.model.layers.2.mlp.up_proj
+language_model.model.layers.2.mlp.down_proj
+language_model.model.layers.2.mlp.act_fn
+language_model.model.layers.2.input_layernorm
+language_model.model.layers.2.post_attention_layernorm
+language_model.model.layers.3
+language_model.model.layers.3.self_attn
+language_model.model.layers.3.self_attn.q_proj
+language_model.model.layers.3.self_attn.k_proj
+language_model.model.layers.3.self_attn.v_proj
+language_model.model.layers.3.self_attn.o_proj
+language_model.model.layers.3.mlp
+language_model.model.layers.3.mlp.gate_proj
+language_model.model.layers.3.mlp.up_proj
+language_model.model.layers.3.mlp.down_proj
+language_model.model.layers.3.mlp.act_fn
+language_model.model.layers.3.input_layernorm
+language_model.model.layers.3.post_attention_layernorm
+language_model.model.layers.4
+language_model.model.layers.4.self_attn
+language_model.model.layers.4.self_attn.q_proj
+language_model.model.layers.4.self_attn.k_proj
+language_model.model.layers.4.self_attn.v_proj
+language_model.model.layers.4.self_attn.o_proj
+language_model.model.layers.4.mlp
+language_model.model.layers.4.mlp.gate_proj
+language_model.model.layers.4.mlp.up_proj
+language_model.model.layers.4.mlp.down_proj
+language_model.model.layers.4.mlp.act_fn
+language_model.model.layers.4.input_layernorm
+language_model.model.layers.4.post_attention_layernorm
+language_model.model.layers.5
+language_model.model.layers.5.self_attn
+language_model.model.layers.5.self_attn.q_proj
+language_model.model.layers.5.self_attn.k_proj
+language_model.model.layers.5.self_attn.v_proj
+language_model.model.layers.5.self_attn.o_proj
+language_model.model.layers.5.mlp
+language_model.model.layers.5.mlp.gate_proj
+language_model.model.layers.5.mlp.up_proj
+language_model.model.layers.5.mlp.down_proj
+language_model.model.layers.5.mlp.act_fn
+language_model.model.layers.5.input_layernorm
+language_model.model.layers.5.post_attention_layernorm
+language_model.model.layers.6
+language_model.model.layers.6.self_attn
+language_model.model.layers.6.self_attn.q_proj
+language_model.model.layers.6.self_attn.k_proj
+language_model.model.layers.6.self_attn.v_proj
+language_model.model.layers.6.self_attn.o_proj
+language_model.model.layers.6.mlp
+language_model.model.layers.6.mlp.gate_proj
+language_model.model.layers.6.mlp.up_proj
+language_model.model.layers.6.mlp.down_proj
+language_model.model.layers.6.mlp.act_fn
+language_model.model.layers.6.input_layernorm
+language_model.model.layers.6.post_attention_layernorm
+language_model.model.layers.7
+language_model.model.layers.7.self_attn
+language_model.model.layers.7.self_attn.q_proj
+language_model.model.layers.7.self_attn.k_proj
+language_model.model.layers.7.self_attn.v_proj
+language_model.model.layers.7.self_attn.o_proj
+language_model.model.layers.7.mlp
+language_model.model.layers.7.mlp.gate_proj
+language_model.model.layers.7.mlp.up_proj
+language_model.model.layers.7.mlp.down_proj
+language_model.model.layers.7.mlp.act_fn
+language_model.model.layers.7.input_layernorm
+language_model.model.layers.7.post_attention_layernorm
+language_model.model.layers.8
+language_model.model.layers.8.self_attn
+language_model.model.layers.8.self_attn.q_proj
+language_model.model.layers.8.self_attn.k_proj
+language_model.model.layers.8.self_attn.v_proj
+language_model.model.layers.8.self_attn.o_proj
+language_model.model.layers.8.mlp
+language_model.model.layers.8.mlp.gate_proj
+language_model.model.layers.8.mlp.up_proj
+language_model.model.layers.8.mlp.down_proj
+language_model.model.layers.8.mlp.act_fn
+language_model.model.layers.8.input_layernorm
+language_model.model.layers.8.post_attention_layernorm
+language_model.model.layers.9
+language_model.model.layers.9.self_attn
+language_model.model.layers.9.self_attn.q_proj
+language_model.model.layers.9.self_attn.k_proj
+language_model.model.layers.9.self_attn.v_proj
+language_model.model.layers.9.self_attn.o_proj
+language_model.model.layers.9.mlp
+language_model.model.layers.9.mlp.gate_proj
+language_model.model.layers.9.mlp.up_proj
+language_model.model.layers.9.mlp.down_proj
+language_model.model.layers.9.mlp.act_fn
+language_model.model.layers.9.input_layernorm
+language_model.model.layers.9.post_attention_layernorm
+language_model.model.layers.10
+language_model.model.layers.10.self_attn
+language_model.model.layers.10.self_attn.q_proj
+language_model.model.layers.10.self_attn.k_proj
+language_model.model.layers.10.self_attn.v_proj
+language_model.model.layers.10.self_attn.o_proj
+language_model.model.layers.10.mlp
+language_model.model.layers.10.mlp.gate_proj
+language_model.model.layers.10.mlp.up_proj
+language_model.model.layers.10.mlp.down_proj
+language_model.model.layers.10.mlp.act_fn
+language_model.model.layers.10.input_layernorm
+language_model.model.layers.10.post_attention_layernorm
+language_model.model.layers.11
+language_model.model.layers.11.self_attn
+language_model.model.layers.11.self_attn.q_proj
+language_model.model.layers.11.self_attn.k_proj
+language_model.model.layers.11.self_attn.v_proj
+language_model.model.layers.11.self_attn.o_proj
+language_model.model.layers.11.mlp
+language_model.model.layers.11.mlp.gate_proj
+language_model.model.layers.11.mlp.up_proj
+language_model.model.layers.11.mlp.down_proj
+language_model.model.layers.11.mlp.act_fn
+language_model.model.layers.11.input_layernorm
+language_model.model.layers.11.post_attention_layernorm
+language_model.model.layers.12
+language_model.model.layers.12.self_attn
+language_model.model.layers.12.self_attn.q_proj
+language_model.model.layers.12.self_attn.k_proj
+language_model.model.layers.12.self_attn.v_proj
+language_model.model.layers.12.self_attn.o_proj
+language_model.model.layers.12.mlp
+language_model.model.layers.12.mlp.gate_proj
+language_model.model.layers.12.mlp.up_proj
+language_model.model.layers.12.mlp.down_proj
+language_model.model.layers.12.mlp.act_fn
+language_model.model.layers.12.input_layernorm
+language_model.model.layers.12.post_attention_layernorm
+language_model.model.layers.13
+language_model.model.layers.13.self_attn
+language_model.model.layers.13.self_attn.q_proj
+language_model.model.layers.13.self_attn.k_proj
+language_model.model.layers.13.self_attn.v_proj
+language_model.model.layers.13.self_attn.o_proj
+language_model.model.layers.13.mlp
+language_model.model.layers.13.mlp.gate_proj
+language_model.model.layers.13.mlp.up_proj
+language_model.model.layers.13.mlp.down_proj
+language_model.model.layers.13.mlp.act_fn
+language_model.model.layers.13.input_layernorm
+language_model.model.layers.13.post_attention_layernorm
+language_model.model.layers.14
+language_model.model.layers.14.self_attn
+language_model.model.layers.14.self_attn.q_proj
+language_model.model.layers.14.self_attn.k_proj
+language_model.model.layers.14.self_attn.v_proj
+language_model.model.layers.14.self_attn.o_proj
+language_model.model.layers.14.mlp
+language_model.model.layers.14.mlp.gate_proj
+language_model.model.layers.14.mlp.up_proj
+language_model.model.layers.14.mlp.down_proj
+language_model.model.layers.14.mlp.act_fn
+language_model.model.layers.14.input_layernorm
+language_model.model.layers.14.post_attention_layernorm
+language_model.model.layers.15
+language_model.model.layers.15.self_attn
+language_model.model.layers.15.self_attn.q_proj
+language_model.model.layers.15.self_attn.k_proj
+language_model.model.layers.15.self_attn.v_proj
+language_model.model.layers.15.self_attn.o_proj
+language_model.model.layers.15.mlp
+language_model.model.layers.15.mlp.gate_proj
+language_model.model.layers.15.mlp.up_proj
+language_model.model.layers.15.mlp.down_proj
+language_model.model.layers.15.mlp.act_fn
+language_model.model.layers.15.input_layernorm
+language_model.model.layers.15.post_attention_layernorm
+language_model.model.layers.16
+language_model.model.layers.16.self_attn
+language_model.model.layers.16.self_attn.q_proj
+language_model.model.layers.16.self_attn.k_proj
+language_model.model.layers.16.self_attn.v_proj
+language_model.model.layers.16.self_attn.o_proj
+language_model.model.layers.16.mlp
+language_model.model.layers.16.mlp.gate_proj
+language_model.model.layers.16.mlp.up_proj
+language_model.model.layers.16.mlp.down_proj
+language_model.model.layers.16.mlp.act_fn
+language_model.model.layers.16.input_layernorm
+language_model.model.layers.16.post_attention_layernorm
+language_model.model.layers.17
+language_model.model.layers.17.self_attn
+language_model.model.layers.17.self_attn.q_proj
+language_model.model.layers.17.self_attn.k_proj
+language_model.model.layers.17.self_attn.v_proj
+language_model.model.layers.17.self_attn.o_proj
+language_model.model.layers.17.mlp
+language_model.model.layers.17.mlp.gate_proj
+language_model.model.layers.17.mlp.up_proj
+language_model.model.layers.17.mlp.down_proj
+language_model.model.layers.17.mlp.act_fn
+language_model.model.layers.17.input_layernorm
+language_model.model.layers.17.post_attention_layernorm
+language_model.model.norm
+language_model.model.rotary_emb
+language_model.lm_head
diff --git a/logs/wonderwind271/MiniCPM-V-2.txt b/logs/wonderwind271/MiniCPM-V-2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..918e19d7954ffb0a98a3ec6206cf76c169401aa5
--- /dev/null
+++ b/logs/wonderwind271/MiniCPM-V-2.txt
@@ -0,0 +1,1133 @@
+
+llm
+llm.model
+llm.model.embed_tokens
+llm.model.layers
+llm.model.layers.0
+llm.model.layers.0.self_attn
+llm.model.layers.0.self_attn.q_proj
+llm.model.layers.0.self_attn.k_proj
+llm.model.layers.0.self_attn.v_proj
+llm.model.layers.0.self_attn.o_proj
+llm.model.layers.0.self_attn.rotary_emb
+llm.model.layers.0.mlp
+llm.model.layers.0.mlp.gate_proj
+llm.model.layers.0.mlp.up_proj
+llm.model.layers.0.mlp.down_proj
+llm.model.layers.0.mlp.act_fn
+llm.model.layers.0.input_layernorm
+llm.model.layers.0.post_attention_layernorm
+llm.model.layers.1
+llm.model.layers.1.self_attn
+llm.model.layers.1.self_attn.q_proj
+llm.model.layers.1.self_attn.k_proj
+llm.model.layers.1.self_attn.v_proj
+llm.model.layers.1.self_attn.o_proj
+llm.model.layers.1.self_attn.rotary_emb
+llm.model.layers.1.mlp
+llm.model.layers.1.mlp.gate_proj
+llm.model.layers.1.mlp.up_proj
+llm.model.layers.1.mlp.down_proj
+llm.model.layers.1.mlp.act_fn
+llm.model.layers.1.input_layernorm
+llm.model.layers.1.post_attention_layernorm
+llm.model.layers.2
+llm.model.layers.2.self_attn
+llm.model.layers.2.self_attn.q_proj
+llm.model.layers.2.self_attn.k_proj
+llm.model.layers.2.self_attn.v_proj
+llm.model.layers.2.self_attn.o_proj
+llm.model.layers.2.self_attn.rotary_emb
+llm.model.layers.2.mlp
+llm.model.layers.2.mlp.gate_proj
+llm.model.layers.2.mlp.up_proj
+llm.model.layers.2.mlp.down_proj
+llm.model.layers.2.mlp.act_fn
+llm.model.layers.2.input_layernorm
+llm.model.layers.2.post_attention_layernorm
+llm.model.layers.3
+llm.model.layers.3.self_attn
+llm.model.layers.3.self_attn.q_proj
+llm.model.layers.3.self_attn.k_proj
+llm.model.layers.3.self_attn.v_proj
+llm.model.layers.3.self_attn.o_proj
+llm.model.layers.3.self_attn.rotary_emb
+llm.model.layers.3.mlp
+llm.model.layers.3.mlp.gate_proj
+llm.model.layers.3.mlp.up_proj
+llm.model.layers.3.mlp.down_proj
+llm.model.layers.3.mlp.act_fn
+llm.model.layers.3.input_layernorm
+llm.model.layers.3.post_attention_layernorm
+llm.model.layers.4
+llm.model.layers.4.self_attn
+llm.model.layers.4.self_attn.q_proj
+llm.model.layers.4.self_attn.k_proj
+llm.model.layers.4.self_attn.v_proj
+llm.model.layers.4.self_attn.o_proj
+llm.model.layers.4.self_attn.rotary_emb
+llm.model.layers.4.mlp
+llm.model.layers.4.mlp.gate_proj
+llm.model.layers.4.mlp.up_proj
+llm.model.layers.4.mlp.down_proj
+llm.model.layers.4.mlp.act_fn
+llm.model.layers.4.input_layernorm
+llm.model.layers.4.post_attention_layernorm
+llm.model.layers.5
+llm.model.layers.5.self_attn
+llm.model.layers.5.self_attn.q_proj
+llm.model.layers.5.self_attn.k_proj
+llm.model.layers.5.self_attn.v_proj
+llm.model.layers.5.self_attn.o_proj
+llm.model.layers.5.self_attn.rotary_emb
+llm.model.layers.5.mlp
+llm.model.layers.5.mlp.gate_proj
+llm.model.layers.5.mlp.up_proj
+llm.model.layers.5.mlp.down_proj
+llm.model.layers.5.mlp.act_fn
+llm.model.layers.5.input_layernorm
+llm.model.layers.5.post_attention_layernorm
+llm.model.layers.6
+llm.model.layers.6.self_attn
+llm.model.layers.6.self_attn.q_proj
+llm.model.layers.6.self_attn.k_proj
+llm.model.layers.6.self_attn.v_proj
+llm.model.layers.6.self_attn.o_proj
+llm.model.layers.6.self_attn.rotary_emb
+llm.model.layers.6.mlp
+llm.model.layers.6.mlp.gate_proj
+llm.model.layers.6.mlp.up_proj
+llm.model.layers.6.mlp.down_proj
+llm.model.layers.6.mlp.act_fn
+llm.model.layers.6.input_layernorm
+llm.model.layers.6.post_attention_layernorm
+llm.model.layers.7
+llm.model.layers.7.self_attn
+llm.model.layers.7.self_attn.q_proj
+llm.model.layers.7.self_attn.k_proj
+llm.model.layers.7.self_attn.v_proj
+llm.model.layers.7.self_attn.o_proj
+llm.model.layers.7.self_attn.rotary_emb
+llm.model.layers.7.mlp
+llm.model.layers.7.mlp.gate_proj
+llm.model.layers.7.mlp.up_proj
+llm.model.layers.7.mlp.down_proj
+llm.model.layers.7.mlp.act_fn
+llm.model.layers.7.input_layernorm
+llm.model.layers.7.post_attention_layernorm
+llm.model.layers.8
+llm.model.layers.8.self_attn
+llm.model.layers.8.self_attn.q_proj
+llm.model.layers.8.self_attn.k_proj
+llm.model.layers.8.self_attn.v_proj
+llm.model.layers.8.self_attn.o_proj
+llm.model.layers.8.self_attn.rotary_emb
+llm.model.layers.8.mlp
+llm.model.layers.8.mlp.gate_proj
+llm.model.layers.8.mlp.up_proj
+llm.model.layers.8.mlp.down_proj
+llm.model.layers.8.mlp.act_fn
+llm.model.layers.8.input_layernorm
+llm.model.layers.8.post_attention_layernorm
+llm.model.layers.9
+llm.model.layers.9.self_attn
+llm.model.layers.9.self_attn.q_proj
+llm.model.layers.9.self_attn.k_proj
+llm.model.layers.9.self_attn.v_proj
+llm.model.layers.9.self_attn.o_proj
+llm.model.layers.9.self_attn.rotary_emb
+llm.model.layers.9.mlp
+llm.model.layers.9.mlp.gate_proj
+llm.model.layers.9.mlp.up_proj
+llm.model.layers.9.mlp.down_proj
+llm.model.layers.9.mlp.act_fn
+llm.model.layers.9.input_layernorm
+llm.model.layers.9.post_attention_layernorm
+llm.model.layers.10
+llm.model.layers.10.self_attn
+llm.model.layers.10.self_attn.q_proj
+llm.model.layers.10.self_attn.k_proj
+llm.model.layers.10.self_attn.v_proj
+llm.model.layers.10.self_attn.o_proj
+llm.model.layers.10.self_attn.rotary_emb
+llm.model.layers.10.mlp
+llm.model.layers.10.mlp.gate_proj
+llm.model.layers.10.mlp.up_proj
+llm.model.layers.10.mlp.down_proj
+llm.model.layers.10.mlp.act_fn
+llm.model.layers.10.input_layernorm
+llm.model.layers.10.post_attention_layernorm
+llm.model.layers.11
+llm.model.layers.11.self_attn
+llm.model.layers.11.self_attn.q_proj
+llm.model.layers.11.self_attn.k_proj
+llm.model.layers.11.self_attn.v_proj
+llm.model.layers.11.self_attn.o_proj
+llm.model.layers.11.self_attn.rotary_emb
+llm.model.layers.11.mlp
+llm.model.layers.11.mlp.gate_proj
+llm.model.layers.11.mlp.up_proj
+llm.model.layers.11.mlp.down_proj
+llm.model.layers.11.mlp.act_fn
+llm.model.layers.11.input_layernorm
+llm.model.layers.11.post_attention_layernorm
+llm.model.layers.12
+llm.model.layers.12.self_attn
+llm.model.layers.12.self_attn.q_proj
+llm.model.layers.12.self_attn.k_proj
+llm.model.layers.12.self_attn.v_proj
+llm.model.layers.12.self_attn.o_proj
+llm.model.layers.12.self_attn.rotary_emb
+llm.model.layers.12.mlp
+llm.model.layers.12.mlp.gate_proj
+llm.model.layers.12.mlp.up_proj
+llm.model.layers.12.mlp.down_proj
+llm.model.layers.12.mlp.act_fn
+llm.model.layers.12.input_layernorm
+llm.model.layers.12.post_attention_layernorm
+llm.model.layers.13
+llm.model.layers.13.self_attn
+llm.model.layers.13.self_attn.q_proj
+llm.model.layers.13.self_attn.k_proj
+llm.model.layers.13.self_attn.v_proj
+llm.model.layers.13.self_attn.o_proj
+llm.model.layers.13.self_attn.rotary_emb
+llm.model.layers.13.mlp
+llm.model.layers.13.mlp.gate_proj
+llm.model.layers.13.mlp.up_proj
+llm.model.layers.13.mlp.down_proj
+llm.model.layers.13.mlp.act_fn
+llm.model.layers.13.input_layernorm
+llm.model.layers.13.post_attention_layernorm
+llm.model.layers.14
+llm.model.layers.14.self_attn
+llm.model.layers.14.self_attn.q_proj
+llm.model.layers.14.self_attn.k_proj
+llm.model.layers.14.self_attn.v_proj
+llm.model.layers.14.self_attn.o_proj
+llm.model.layers.14.self_attn.rotary_emb
+llm.model.layers.14.mlp
+llm.model.layers.14.mlp.gate_proj
+llm.model.layers.14.mlp.up_proj
+llm.model.layers.14.mlp.down_proj
+llm.model.layers.14.mlp.act_fn
+llm.model.layers.14.input_layernorm
+llm.model.layers.14.post_attention_layernorm
+llm.model.layers.15
+llm.model.layers.15.self_attn
+llm.model.layers.15.self_attn.q_proj
+llm.model.layers.15.self_attn.k_proj
+llm.model.layers.15.self_attn.v_proj
+llm.model.layers.15.self_attn.o_proj
+llm.model.layers.15.self_attn.rotary_emb
+llm.model.layers.15.mlp
+llm.model.layers.15.mlp.gate_proj
+llm.model.layers.15.mlp.up_proj
+llm.model.layers.15.mlp.down_proj
+llm.model.layers.15.mlp.act_fn
+llm.model.layers.15.input_layernorm
+llm.model.layers.15.post_attention_layernorm
+llm.model.layers.16
+llm.model.layers.16.self_attn
+llm.model.layers.16.self_attn.q_proj
+llm.model.layers.16.self_attn.k_proj
+llm.model.layers.16.self_attn.v_proj
+llm.model.layers.16.self_attn.o_proj
+llm.model.layers.16.self_attn.rotary_emb
+llm.model.layers.16.mlp
+llm.model.layers.16.mlp.gate_proj
+llm.model.layers.16.mlp.up_proj
+llm.model.layers.16.mlp.down_proj
+llm.model.layers.16.mlp.act_fn
+llm.model.layers.16.input_layernorm
+llm.model.layers.16.post_attention_layernorm
+llm.model.layers.17
+llm.model.layers.17.self_attn
+llm.model.layers.17.self_attn.q_proj
+llm.model.layers.17.self_attn.k_proj
+llm.model.layers.17.self_attn.v_proj
+llm.model.layers.17.self_attn.o_proj
+llm.model.layers.17.self_attn.rotary_emb
+llm.model.layers.17.mlp
+llm.model.layers.17.mlp.gate_proj
+llm.model.layers.17.mlp.up_proj
+llm.model.layers.17.mlp.down_proj
+llm.model.layers.17.mlp.act_fn
+llm.model.layers.17.input_layernorm
+llm.model.layers.17.post_attention_layernorm
+llm.model.layers.18
+llm.model.layers.18.self_attn
+llm.model.layers.18.self_attn.q_proj
+llm.model.layers.18.self_attn.k_proj
+llm.model.layers.18.self_attn.v_proj
+llm.model.layers.18.self_attn.o_proj
+llm.model.layers.18.self_attn.rotary_emb
+llm.model.layers.18.mlp
+llm.model.layers.18.mlp.gate_proj
+llm.model.layers.18.mlp.up_proj
+llm.model.layers.18.mlp.down_proj
+llm.model.layers.18.mlp.act_fn
+llm.model.layers.18.input_layernorm
+llm.model.layers.18.post_attention_layernorm
+llm.model.layers.19
+llm.model.layers.19.self_attn
+llm.model.layers.19.self_attn.q_proj
+llm.model.layers.19.self_attn.k_proj
+llm.model.layers.19.self_attn.v_proj
+llm.model.layers.19.self_attn.o_proj
+llm.model.layers.19.self_attn.rotary_emb
+llm.model.layers.19.mlp
+llm.model.layers.19.mlp.gate_proj
+llm.model.layers.19.mlp.up_proj
+llm.model.layers.19.mlp.down_proj
+llm.model.layers.19.mlp.act_fn
+llm.model.layers.19.input_layernorm
+llm.model.layers.19.post_attention_layernorm
+llm.model.layers.20
+llm.model.layers.20.self_attn
+llm.model.layers.20.self_attn.q_proj
+llm.model.layers.20.self_attn.k_proj
+llm.model.layers.20.self_attn.v_proj
+llm.model.layers.20.self_attn.o_proj
+llm.model.layers.20.self_attn.rotary_emb
+llm.model.layers.20.mlp
+llm.model.layers.20.mlp.gate_proj
+llm.model.layers.20.mlp.up_proj
+llm.model.layers.20.mlp.down_proj
+llm.model.layers.20.mlp.act_fn
+llm.model.layers.20.input_layernorm
+llm.model.layers.20.post_attention_layernorm
+llm.model.layers.21
+llm.model.layers.21.self_attn
+llm.model.layers.21.self_attn.q_proj
+llm.model.layers.21.self_attn.k_proj
+llm.model.layers.21.self_attn.v_proj
+llm.model.layers.21.self_attn.o_proj
+llm.model.layers.21.self_attn.rotary_emb
+llm.model.layers.21.mlp
+llm.model.layers.21.mlp.gate_proj
+llm.model.layers.21.mlp.up_proj
+llm.model.layers.21.mlp.down_proj
+llm.model.layers.21.mlp.act_fn
+llm.model.layers.21.input_layernorm
+llm.model.layers.21.post_attention_layernorm
+llm.model.layers.22
+llm.model.layers.22.self_attn
+llm.model.layers.22.self_attn.q_proj
+llm.model.layers.22.self_attn.k_proj
+llm.model.layers.22.self_attn.v_proj
+llm.model.layers.22.self_attn.o_proj
+llm.model.layers.22.self_attn.rotary_emb
+llm.model.layers.22.mlp
+llm.model.layers.22.mlp.gate_proj
+llm.model.layers.22.mlp.up_proj
+llm.model.layers.22.mlp.down_proj
+llm.model.layers.22.mlp.act_fn
+llm.model.layers.22.input_layernorm
+llm.model.layers.22.post_attention_layernorm
+llm.model.layers.23
+llm.model.layers.23.self_attn
+llm.model.layers.23.self_attn.q_proj
+llm.model.layers.23.self_attn.k_proj
+llm.model.layers.23.self_attn.v_proj
+llm.model.layers.23.self_attn.o_proj
+llm.model.layers.23.self_attn.rotary_emb
+llm.model.layers.23.mlp
+llm.model.layers.23.mlp.gate_proj
+llm.model.layers.23.mlp.up_proj
+llm.model.layers.23.mlp.down_proj
+llm.model.layers.23.mlp.act_fn
+llm.model.layers.23.input_layernorm
+llm.model.layers.23.post_attention_layernorm
+llm.model.layers.24
+llm.model.layers.24.self_attn
+llm.model.layers.24.self_attn.q_proj
+llm.model.layers.24.self_attn.k_proj
+llm.model.layers.24.self_attn.v_proj
+llm.model.layers.24.self_attn.o_proj
+llm.model.layers.24.self_attn.rotary_emb
+llm.model.layers.24.mlp
+llm.model.layers.24.mlp.gate_proj
+llm.model.layers.24.mlp.up_proj
+llm.model.layers.24.mlp.down_proj
+llm.model.layers.24.mlp.act_fn
+llm.model.layers.24.input_layernorm
+llm.model.layers.24.post_attention_layernorm
+llm.model.layers.25
+llm.model.layers.25.self_attn
+llm.model.layers.25.self_attn.q_proj
+llm.model.layers.25.self_attn.k_proj
+llm.model.layers.25.self_attn.v_proj
+llm.model.layers.25.self_attn.o_proj
+llm.model.layers.25.self_attn.rotary_emb
+llm.model.layers.25.mlp
+llm.model.layers.25.mlp.gate_proj
+llm.model.layers.25.mlp.up_proj
+llm.model.layers.25.mlp.down_proj
+llm.model.layers.25.mlp.act_fn
+llm.model.layers.25.input_layernorm
+llm.model.layers.25.post_attention_layernorm
+llm.model.layers.26
+llm.model.layers.26.self_attn
+llm.model.layers.26.self_attn.q_proj
+llm.model.layers.26.self_attn.k_proj
+llm.model.layers.26.self_attn.v_proj
+llm.model.layers.26.self_attn.o_proj
+llm.model.layers.26.self_attn.rotary_emb
+llm.model.layers.26.mlp
+llm.model.layers.26.mlp.gate_proj
+llm.model.layers.26.mlp.up_proj
+llm.model.layers.26.mlp.down_proj
+llm.model.layers.26.mlp.act_fn
+llm.model.layers.26.input_layernorm
+llm.model.layers.26.post_attention_layernorm
+llm.model.layers.27
+llm.model.layers.27.self_attn
+llm.model.layers.27.self_attn.q_proj
+llm.model.layers.27.self_attn.k_proj
+llm.model.layers.27.self_attn.v_proj
+llm.model.layers.27.self_attn.o_proj
+llm.model.layers.27.self_attn.rotary_emb
+llm.model.layers.27.mlp
+llm.model.layers.27.mlp.gate_proj
+llm.model.layers.27.mlp.up_proj
+llm.model.layers.27.mlp.down_proj
+llm.model.layers.27.mlp.act_fn
+llm.model.layers.27.input_layernorm
+llm.model.layers.27.post_attention_layernorm
+llm.model.layers.28
+llm.model.layers.28.self_attn
+llm.model.layers.28.self_attn.q_proj
+llm.model.layers.28.self_attn.k_proj
+llm.model.layers.28.self_attn.v_proj
+llm.model.layers.28.self_attn.o_proj
+llm.model.layers.28.self_attn.rotary_emb
+llm.model.layers.28.mlp
+llm.model.layers.28.mlp.gate_proj
+llm.model.layers.28.mlp.up_proj
+llm.model.layers.28.mlp.down_proj
+llm.model.layers.28.mlp.act_fn
+llm.model.layers.28.input_layernorm
+llm.model.layers.28.post_attention_layernorm
+llm.model.layers.29
+llm.model.layers.29.self_attn
+llm.model.layers.29.self_attn.q_proj
+llm.model.layers.29.self_attn.k_proj
+llm.model.layers.29.self_attn.v_proj
+llm.model.layers.29.self_attn.o_proj
+llm.model.layers.29.self_attn.rotary_emb
+llm.model.layers.29.mlp
+llm.model.layers.29.mlp.gate_proj
+llm.model.layers.29.mlp.up_proj
+llm.model.layers.29.mlp.down_proj
+llm.model.layers.29.mlp.act_fn
+llm.model.layers.29.input_layernorm
+llm.model.layers.29.post_attention_layernorm
+llm.model.layers.30
+llm.model.layers.30.self_attn
+llm.model.layers.30.self_attn.q_proj
+llm.model.layers.30.self_attn.k_proj
+llm.model.layers.30.self_attn.v_proj
+llm.model.layers.30.self_attn.o_proj
+llm.model.layers.30.self_attn.rotary_emb
+llm.model.layers.30.mlp
+llm.model.layers.30.mlp.gate_proj
+llm.model.layers.30.mlp.up_proj
+llm.model.layers.30.mlp.down_proj
+llm.model.layers.30.mlp.act_fn
+llm.model.layers.30.input_layernorm
+llm.model.layers.30.post_attention_layernorm
+llm.model.layers.31
+llm.model.layers.31.self_attn
+llm.model.layers.31.self_attn.q_proj
+llm.model.layers.31.self_attn.k_proj
+llm.model.layers.31.self_attn.v_proj
+llm.model.layers.31.self_attn.o_proj
+llm.model.layers.31.self_attn.rotary_emb
+llm.model.layers.31.mlp
+llm.model.layers.31.mlp.gate_proj
+llm.model.layers.31.mlp.up_proj
+llm.model.layers.31.mlp.down_proj
+llm.model.layers.31.mlp.act_fn
+llm.model.layers.31.input_layernorm
+llm.model.layers.31.post_attention_layernorm
+llm.model.layers.32
+llm.model.layers.32.self_attn
+llm.model.layers.32.self_attn.q_proj
+llm.model.layers.32.self_attn.k_proj
+llm.model.layers.32.self_attn.v_proj
+llm.model.layers.32.self_attn.o_proj
+llm.model.layers.32.self_attn.rotary_emb
+llm.model.layers.32.mlp
+llm.model.layers.32.mlp.gate_proj
+llm.model.layers.32.mlp.up_proj
+llm.model.layers.32.mlp.down_proj
+llm.model.layers.32.mlp.act_fn
+llm.model.layers.32.input_layernorm
+llm.model.layers.32.post_attention_layernorm
+llm.model.layers.33
+llm.model.layers.33.self_attn
+llm.model.layers.33.self_attn.q_proj
+llm.model.layers.33.self_attn.k_proj
+llm.model.layers.33.self_attn.v_proj
+llm.model.layers.33.self_attn.o_proj
+llm.model.layers.33.self_attn.rotary_emb
+llm.model.layers.33.mlp
+llm.model.layers.33.mlp.gate_proj
+llm.model.layers.33.mlp.up_proj
+llm.model.layers.33.mlp.down_proj
+llm.model.layers.33.mlp.act_fn
+llm.model.layers.33.input_layernorm
+llm.model.layers.33.post_attention_layernorm
+llm.model.layers.34
+llm.model.layers.34.self_attn
+llm.model.layers.34.self_attn.q_proj
+llm.model.layers.34.self_attn.k_proj
+llm.model.layers.34.self_attn.v_proj
+llm.model.layers.34.self_attn.o_proj
+llm.model.layers.34.self_attn.rotary_emb
+llm.model.layers.34.mlp
+llm.model.layers.34.mlp.gate_proj
+llm.model.layers.34.mlp.up_proj
+llm.model.layers.34.mlp.down_proj
+llm.model.layers.34.mlp.act_fn
+llm.model.layers.34.input_layernorm
+llm.model.layers.34.post_attention_layernorm
+llm.model.layers.35
+llm.model.layers.35.self_attn
+llm.model.layers.35.self_attn.q_proj
+llm.model.layers.35.self_attn.k_proj
+llm.model.layers.35.self_attn.v_proj
+llm.model.layers.35.self_attn.o_proj
+llm.model.layers.35.self_attn.rotary_emb
+llm.model.layers.35.mlp
+llm.model.layers.35.mlp.gate_proj
+llm.model.layers.35.mlp.up_proj
+llm.model.layers.35.mlp.down_proj
+llm.model.layers.35.mlp.act_fn
+llm.model.layers.35.input_layernorm
+llm.model.layers.35.post_attention_layernorm
+llm.model.layers.36
+llm.model.layers.36.self_attn
+llm.model.layers.36.self_attn.q_proj
+llm.model.layers.36.self_attn.k_proj
+llm.model.layers.36.self_attn.v_proj
+llm.model.layers.36.self_attn.o_proj
+llm.model.layers.36.self_attn.rotary_emb
+llm.model.layers.36.mlp
+llm.model.layers.36.mlp.gate_proj
+llm.model.layers.36.mlp.up_proj
+llm.model.layers.36.mlp.down_proj
+llm.model.layers.36.mlp.act_fn
+llm.model.layers.36.input_layernorm
+llm.model.layers.36.post_attention_layernorm
+llm.model.layers.37
+llm.model.layers.37.self_attn
+llm.model.layers.37.self_attn.q_proj
+llm.model.layers.37.self_attn.k_proj
+llm.model.layers.37.self_attn.v_proj
+llm.model.layers.37.self_attn.o_proj
+llm.model.layers.37.self_attn.rotary_emb
+llm.model.layers.37.mlp
+llm.model.layers.37.mlp.gate_proj
+llm.model.layers.37.mlp.up_proj
+llm.model.layers.37.mlp.down_proj
+llm.model.layers.37.mlp.act_fn
+llm.model.layers.37.input_layernorm
+llm.model.layers.37.post_attention_layernorm
+llm.model.layers.38
+llm.model.layers.38.self_attn
+llm.model.layers.38.self_attn.q_proj
+llm.model.layers.38.self_attn.k_proj
+llm.model.layers.38.self_attn.v_proj
+llm.model.layers.38.self_attn.o_proj
+llm.model.layers.38.self_attn.rotary_emb
+llm.model.layers.38.mlp
+llm.model.layers.38.mlp.gate_proj
+llm.model.layers.38.mlp.up_proj
+llm.model.layers.38.mlp.down_proj
+llm.model.layers.38.mlp.act_fn
+llm.model.layers.38.input_layernorm
+llm.model.layers.38.post_attention_layernorm
+llm.model.layers.39
+llm.model.layers.39.self_attn
+llm.model.layers.39.self_attn.q_proj
+llm.model.layers.39.self_attn.k_proj
+llm.model.layers.39.self_attn.v_proj
+llm.model.layers.39.self_attn.o_proj
+llm.model.layers.39.self_attn.rotary_emb
+llm.model.layers.39.mlp
+llm.model.layers.39.mlp.gate_proj
+llm.model.layers.39.mlp.up_proj
+llm.model.layers.39.mlp.down_proj
+llm.model.layers.39.mlp.act_fn
+llm.model.layers.39.input_layernorm
+llm.model.layers.39.post_attention_layernorm
+llm.model.norm
+llm.lm_head
+vpm
+vpm.patch_embed
+vpm.patch_embed.proj
+vpm.patch_embed.norm
+vpm.pos_drop
+vpm.patch_drop
+vpm.norm_pre
+vpm.blocks
+vpm.blocks.0
+vpm.blocks.0.norm1
+vpm.blocks.0.attn
+vpm.blocks.0.attn.qkv
+vpm.blocks.0.attn.q_norm
+vpm.blocks.0.attn.k_norm
+vpm.blocks.0.attn.attn_drop
+vpm.blocks.0.attn.proj
+vpm.blocks.0.attn.proj_drop
+vpm.blocks.0.ls1
+vpm.blocks.0.drop_path1
+vpm.blocks.0.norm2
+vpm.blocks.0.mlp
+vpm.blocks.0.mlp.fc1
+vpm.blocks.0.mlp.act
+vpm.blocks.0.mlp.drop1
+vpm.blocks.0.mlp.norm
+vpm.blocks.0.mlp.fc2
+vpm.blocks.0.mlp.drop2
+vpm.blocks.0.ls2
+vpm.blocks.0.drop_path2
+vpm.blocks.1
+vpm.blocks.1.norm1
+vpm.blocks.1.attn
+vpm.blocks.1.attn.qkv
+vpm.blocks.1.attn.q_norm
+vpm.blocks.1.attn.k_norm
+vpm.blocks.1.attn.attn_drop
+vpm.blocks.1.attn.proj
+vpm.blocks.1.attn.proj_drop
+vpm.blocks.1.ls1
+vpm.blocks.1.drop_path1
+vpm.blocks.1.norm2
+vpm.blocks.1.mlp
+vpm.blocks.1.mlp.fc1
+vpm.blocks.1.mlp.act
+vpm.blocks.1.mlp.drop1
+vpm.blocks.1.mlp.norm
+vpm.blocks.1.mlp.fc2
+vpm.blocks.1.mlp.drop2
+vpm.blocks.1.ls2
+vpm.blocks.1.drop_path2
+vpm.blocks.2
+vpm.blocks.2.norm1
+vpm.blocks.2.attn
+vpm.blocks.2.attn.qkv
+vpm.blocks.2.attn.q_norm
+vpm.blocks.2.attn.k_norm
+vpm.blocks.2.attn.attn_drop
+vpm.blocks.2.attn.proj
+vpm.blocks.2.attn.proj_drop
+vpm.blocks.2.ls1
+vpm.blocks.2.drop_path1
+vpm.blocks.2.norm2
+vpm.blocks.2.mlp
+vpm.blocks.2.mlp.fc1
+vpm.blocks.2.mlp.act
+vpm.blocks.2.mlp.drop1
+vpm.blocks.2.mlp.norm
+vpm.blocks.2.mlp.fc2
+vpm.blocks.2.mlp.drop2
+vpm.blocks.2.ls2
+vpm.blocks.2.drop_path2
+vpm.blocks.3
+vpm.blocks.3.norm1
+vpm.blocks.3.attn
+vpm.blocks.3.attn.qkv
+vpm.blocks.3.attn.q_norm
+vpm.blocks.3.attn.k_norm
+vpm.blocks.3.attn.attn_drop
+vpm.blocks.3.attn.proj
+vpm.blocks.3.attn.proj_drop
+vpm.blocks.3.ls1
+vpm.blocks.3.drop_path1
+vpm.blocks.3.norm2
+vpm.blocks.3.mlp
+vpm.blocks.3.mlp.fc1
+vpm.blocks.3.mlp.act
+vpm.blocks.3.mlp.drop1
+vpm.blocks.3.mlp.norm
+vpm.blocks.3.mlp.fc2
+vpm.blocks.3.mlp.drop2
+vpm.blocks.3.ls2
+vpm.blocks.3.drop_path2
+vpm.blocks.4
+vpm.blocks.4.norm1
+vpm.blocks.4.attn
+vpm.blocks.4.attn.qkv
+vpm.blocks.4.attn.q_norm
+vpm.blocks.4.attn.k_norm
+vpm.blocks.4.attn.attn_drop
+vpm.blocks.4.attn.proj
+vpm.blocks.4.attn.proj_drop
+vpm.blocks.4.ls1
+vpm.blocks.4.drop_path1
+vpm.blocks.4.norm2
+vpm.blocks.4.mlp
+vpm.blocks.4.mlp.fc1
+vpm.blocks.4.mlp.act
+vpm.blocks.4.mlp.drop1
+vpm.blocks.4.mlp.norm
+vpm.blocks.4.mlp.fc2
+vpm.blocks.4.mlp.drop2
+vpm.blocks.4.ls2
+vpm.blocks.4.drop_path2
+vpm.blocks.5
+vpm.blocks.5.norm1
+vpm.blocks.5.attn
+vpm.blocks.5.attn.qkv
+vpm.blocks.5.attn.q_norm
+vpm.blocks.5.attn.k_norm
+vpm.blocks.5.attn.attn_drop
+vpm.blocks.5.attn.proj
+vpm.blocks.5.attn.proj_drop
+vpm.blocks.5.ls1
+vpm.blocks.5.drop_path1
+vpm.blocks.5.norm2
+vpm.blocks.5.mlp
+vpm.blocks.5.mlp.fc1
+vpm.blocks.5.mlp.act
+vpm.blocks.5.mlp.drop1
+vpm.blocks.5.mlp.norm
+vpm.blocks.5.mlp.fc2
+vpm.blocks.5.mlp.drop2
+vpm.blocks.5.ls2
+vpm.blocks.5.drop_path2
+vpm.blocks.6
+vpm.blocks.6.norm1
+vpm.blocks.6.attn
+vpm.blocks.6.attn.qkv
+vpm.blocks.6.attn.q_norm
+vpm.blocks.6.attn.k_norm
+vpm.blocks.6.attn.attn_drop
+vpm.blocks.6.attn.proj
+vpm.blocks.6.attn.proj_drop
+vpm.blocks.6.ls1
+vpm.blocks.6.drop_path1
+vpm.blocks.6.norm2
+vpm.blocks.6.mlp
+vpm.blocks.6.mlp.fc1
+vpm.blocks.6.mlp.act
+vpm.blocks.6.mlp.drop1
+vpm.blocks.6.mlp.norm
+vpm.blocks.6.mlp.fc2
+vpm.blocks.6.mlp.drop2
+vpm.blocks.6.ls2
+vpm.blocks.6.drop_path2
+vpm.blocks.7
+vpm.blocks.7.norm1
+vpm.blocks.7.attn
+vpm.blocks.7.attn.qkv
+vpm.blocks.7.attn.q_norm
+vpm.blocks.7.attn.k_norm
+vpm.blocks.7.attn.attn_drop
+vpm.blocks.7.attn.proj
+vpm.blocks.7.attn.proj_drop
+vpm.blocks.7.ls1
+vpm.blocks.7.drop_path1
+vpm.blocks.7.norm2
+vpm.blocks.7.mlp
+vpm.blocks.7.mlp.fc1
+vpm.blocks.7.mlp.act
+vpm.blocks.7.mlp.drop1
+vpm.blocks.7.mlp.norm
+vpm.blocks.7.mlp.fc2
+vpm.blocks.7.mlp.drop2
+vpm.blocks.7.ls2
+vpm.blocks.7.drop_path2
+vpm.blocks.8
+vpm.blocks.8.norm1
+vpm.blocks.8.attn
+vpm.blocks.8.attn.qkv
+vpm.blocks.8.attn.q_norm
+vpm.blocks.8.attn.k_norm
+vpm.blocks.8.attn.attn_drop
+vpm.blocks.8.attn.proj
+vpm.blocks.8.attn.proj_drop
+vpm.blocks.8.ls1
+vpm.blocks.8.drop_path1
+vpm.blocks.8.norm2
+vpm.blocks.8.mlp
+vpm.blocks.8.mlp.fc1
+vpm.blocks.8.mlp.act
+vpm.blocks.8.mlp.drop1
+vpm.blocks.8.mlp.norm
+vpm.blocks.8.mlp.fc2
+vpm.blocks.8.mlp.drop2
+vpm.blocks.8.ls2
+vpm.blocks.8.drop_path2
+vpm.blocks.9
+vpm.blocks.9.norm1
+vpm.blocks.9.attn
+vpm.blocks.9.attn.qkv
+vpm.blocks.9.attn.q_norm
+vpm.blocks.9.attn.k_norm
+vpm.blocks.9.attn.attn_drop
+vpm.blocks.9.attn.proj
+vpm.blocks.9.attn.proj_drop
+vpm.blocks.9.ls1
+vpm.blocks.9.drop_path1
+vpm.blocks.9.norm2
+vpm.blocks.9.mlp
+vpm.blocks.9.mlp.fc1
+vpm.blocks.9.mlp.act
+vpm.blocks.9.mlp.drop1
+vpm.blocks.9.mlp.norm
+vpm.blocks.9.mlp.fc2
+vpm.blocks.9.mlp.drop2
+vpm.blocks.9.ls2
+vpm.blocks.9.drop_path2
+vpm.blocks.10
+vpm.blocks.10.norm1
+vpm.blocks.10.attn
+vpm.blocks.10.attn.qkv
+vpm.blocks.10.attn.q_norm
+vpm.blocks.10.attn.k_norm
+vpm.blocks.10.attn.attn_drop
+vpm.blocks.10.attn.proj
+vpm.blocks.10.attn.proj_drop
+vpm.blocks.10.ls1
+vpm.blocks.10.drop_path1
+vpm.blocks.10.norm2
+vpm.blocks.10.mlp
+vpm.blocks.10.mlp.fc1
+vpm.blocks.10.mlp.act
+vpm.blocks.10.mlp.drop1
+vpm.blocks.10.mlp.norm
+vpm.blocks.10.mlp.fc2
+vpm.blocks.10.mlp.drop2
+vpm.blocks.10.ls2
+vpm.blocks.10.drop_path2
+vpm.blocks.11
+vpm.blocks.11.norm1
+vpm.blocks.11.attn
+vpm.blocks.11.attn.qkv
+vpm.blocks.11.attn.q_norm
+vpm.blocks.11.attn.k_norm
+vpm.blocks.11.attn.attn_drop
+vpm.blocks.11.attn.proj
+vpm.blocks.11.attn.proj_drop
+vpm.blocks.11.ls1
+vpm.blocks.11.drop_path1
+vpm.blocks.11.norm2
+vpm.blocks.11.mlp
+vpm.blocks.11.mlp.fc1
+vpm.blocks.11.mlp.act
+vpm.blocks.11.mlp.drop1
+vpm.blocks.11.mlp.norm
+vpm.blocks.11.mlp.fc2
+vpm.blocks.11.mlp.drop2
+vpm.blocks.11.ls2
+vpm.blocks.11.drop_path2
+vpm.blocks.12
+vpm.blocks.12.norm1
+vpm.blocks.12.attn
+vpm.blocks.12.attn.qkv
+vpm.blocks.12.attn.q_norm
+vpm.blocks.12.attn.k_norm
+vpm.blocks.12.attn.attn_drop
+vpm.blocks.12.attn.proj
+vpm.blocks.12.attn.proj_drop
+vpm.blocks.12.ls1
+vpm.blocks.12.drop_path1
+vpm.blocks.12.norm2
+vpm.blocks.12.mlp
+vpm.blocks.12.mlp.fc1
+vpm.blocks.12.mlp.act
+vpm.blocks.12.mlp.drop1
+vpm.blocks.12.mlp.norm
+vpm.blocks.12.mlp.fc2
+vpm.blocks.12.mlp.drop2
+vpm.blocks.12.ls2
+vpm.blocks.12.drop_path2
+vpm.blocks.13
+vpm.blocks.13.norm1
+vpm.blocks.13.attn
+vpm.blocks.13.attn.qkv
+vpm.blocks.13.attn.q_norm
+vpm.blocks.13.attn.k_norm
+vpm.blocks.13.attn.attn_drop
+vpm.blocks.13.attn.proj
+vpm.blocks.13.attn.proj_drop
+vpm.blocks.13.ls1
+vpm.blocks.13.drop_path1
+vpm.blocks.13.norm2
+vpm.blocks.13.mlp
+vpm.blocks.13.mlp.fc1
+vpm.blocks.13.mlp.act
+vpm.blocks.13.mlp.drop1
+vpm.blocks.13.mlp.norm
+vpm.blocks.13.mlp.fc2
+vpm.blocks.13.mlp.drop2
+vpm.blocks.13.ls2
+vpm.blocks.13.drop_path2
+vpm.blocks.14
+vpm.blocks.14.norm1
+vpm.blocks.14.attn
+vpm.blocks.14.attn.qkv
+vpm.blocks.14.attn.q_norm
+vpm.blocks.14.attn.k_norm
+vpm.blocks.14.attn.attn_drop
+vpm.blocks.14.attn.proj
+vpm.blocks.14.attn.proj_drop
+vpm.blocks.14.ls1
+vpm.blocks.14.drop_path1
+vpm.blocks.14.norm2
+vpm.blocks.14.mlp
+vpm.blocks.14.mlp.fc1
+vpm.blocks.14.mlp.act
+vpm.blocks.14.mlp.drop1
+vpm.blocks.14.mlp.norm
+vpm.blocks.14.mlp.fc2
+vpm.blocks.14.mlp.drop2
+vpm.blocks.14.ls2
+vpm.blocks.14.drop_path2
+vpm.blocks.15
+vpm.blocks.15.norm1
+vpm.blocks.15.attn
+vpm.blocks.15.attn.qkv
+vpm.blocks.15.attn.q_norm
+vpm.blocks.15.attn.k_norm
+vpm.blocks.15.attn.attn_drop
+vpm.blocks.15.attn.proj
+vpm.blocks.15.attn.proj_drop
+vpm.blocks.15.ls1
+vpm.blocks.15.drop_path1
+vpm.blocks.15.norm2
+vpm.blocks.15.mlp
+vpm.blocks.15.mlp.fc1
+vpm.blocks.15.mlp.act
+vpm.blocks.15.mlp.drop1
+vpm.blocks.15.mlp.norm
+vpm.blocks.15.mlp.fc2
+vpm.blocks.15.mlp.drop2
+vpm.blocks.15.ls2
+vpm.blocks.15.drop_path2
+vpm.blocks.16
+vpm.blocks.16.norm1
+vpm.blocks.16.attn
+vpm.blocks.16.attn.qkv
+vpm.blocks.16.attn.q_norm
+vpm.blocks.16.attn.k_norm
+vpm.blocks.16.attn.attn_drop
+vpm.blocks.16.attn.proj
+vpm.blocks.16.attn.proj_drop
+vpm.blocks.16.ls1
+vpm.blocks.16.drop_path1
+vpm.blocks.16.norm2
+vpm.blocks.16.mlp
+vpm.blocks.16.mlp.fc1
+vpm.blocks.16.mlp.act
+vpm.blocks.16.mlp.drop1
+vpm.blocks.16.mlp.norm
+vpm.blocks.16.mlp.fc2
+vpm.blocks.16.mlp.drop2
+vpm.blocks.16.ls2
+vpm.blocks.16.drop_path2
+vpm.blocks.17
+vpm.blocks.17.norm1
+vpm.blocks.17.attn
+vpm.blocks.17.attn.qkv
+vpm.blocks.17.attn.q_norm
+vpm.blocks.17.attn.k_norm
+vpm.blocks.17.attn.attn_drop
+vpm.blocks.17.attn.proj
+vpm.blocks.17.attn.proj_drop
+vpm.blocks.17.ls1
+vpm.blocks.17.drop_path1
+vpm.blocks.17.norm2
+vpm.blocks.17.mlp
+vpm.blocks.17.mlp.fc1
+vpm.blocks.17.mlp.act
+vpm.blocks.17.mlp.drop1
+vpm.blocks.17.mlp.norm
+vpm.blocks.17.mlp.fc2
+vpm.blocks.17.mlp.drop2
+vpm.blocks.17.ls2
+vpm.blocks.17.drop_path2
+vpm.blocks.18
+vpm.blocks.18.norm1
+vpm.blocks.18.attn
+vpm.blocks.18.attn.qkv
+vpm.blocks.18.attn.q_norm
+vpm.blocks.18.attn.k_norm
+vpm.blocks.18.attn.attn_drop
+vpm.blocks.18.attn.proj
+vpm.blocks.18.attn.proj_drop
+vpm.blocks.18.ls1
+vpm.blocks.18.drop_path1
+vpm.blocks.18.norm2
+vpm.blocks.18.mlp
+vpm.blocks.18.mlp.fc1
+vpm.blocks.18.mlp.act
+vpm.blocks.18.mlp.drop1
+vpm.blocks.18.mlp.norm
+vpm.blocks.18.mlp.fc2
+vpm.blocks.18.mlp.drop2
+vpm.blocks.18.ls2
+vpm.blocks.18.drop_path2
+vpm.blocks.19
+vpm.blocks.19.norm1
+vpm.blocks.19.attn
+vpm.blocks.19.attn.qkv
+vpm.blocks.19.attn.q_norm
+vpm.blocks.19.attn.k_norm
+vpm.blocks.19.attn.attn_drop
+vpm.blocks.19.attn.proj
+vpm.blocks.19.attn.proj_drop
+vpm.blocks.19.ls1
+vpm.blocks.19.drop_path1
+vpm.blocks.19.norm2
+vpm.blocks.19.mlp
+vpm.blocks.19.mlp.fc1
+vpm.blocks.19.mlp.act
+vpm.blocks.19.mlp.drop1
+vpm.blocks.19.mlp.norm
+vpm.blocks.19.mlp.fc2
+vpm.blocks.19.mlp.drop2
+vpm.blocks.19.ls2
+vpm.blocks.19.drop_path2
+vpm.blocks.20
+vpm.blocks.20.norm1
+vpm.blocks.20.attn
+vpm.blocks.20.attn.qkv
+vpm.blocks.20.attn.q_norm
+vpm.blocks.20.attn.k_norm
+vpm.blocks.20.attn.attn_drop
+vpm.blocks.20.attn.proj
+vpm.blocks.20.attn.proj_drop
+vpm.blocks.20.ls1
+vpm.blocks.20.drop_path1
+vpm.blocks.20.norm2
+vpm.blocks.20.mlp
+vpm.blocks.20.mlp.fc1
+vpm.blocks.20.mlp.act
+vpm.blocks.20.mlp.drop1
+vpm.blocks.20.mlp.norm
+vpm.blocks.20.mlp.fc2
+vpm.blocks.20.mlp.drop2
+vpm.blocks.20.ls2
+vpm.blocks.20.drop_path2
+vpm.blocks.21
+vpm.blocks.21.norm1
+vpm.blocks.21.attn
+vpm.blocks.21.attn.qkv
+vpm.blocks.21.attn.q_norm
+vpm.blocks.21.attn.k_norm
+vpm.blocks.21.attn.attn_drop
+vpm.blocks.21.attn.proj
+vpm.blocks.21.attn.proj_drop
+vpm.blocks.21.ls1
+vpm.blocks.21.drop_path1
+vpm.blocks.21.norm2
+vpm.blocks.21.mlp
+vpm.blocks.21.mlp.fc1
+vpm.blocks.21.mlp.act
+vpm.blocks.21.mlp.drop1
+vpm.blocks.21.mlp.norm
+vpm.blocks.21.mlp.fc2
+vpm.blocks.21.mlp.drop2
+vpm.blocks.21.ls2
+vpm.blocks.21.drop_path2
+vpm.blocks.22
+vpm.blocks.22.norm1
+vpm.blocks.22.attn
+vpm.blocks.22.attn.qkv
+vpm.blocks.22.attn.q_norm
+vpm.blocks.22.attn.k_norm
+vpm.blocks.22.attn.attn_drop
+vpm.blocks.22.attn.proj
+vpm.blocks.22.attn.proj_drop
+vpm.blocks.22.ls1
+vpm.blocks.22.drop_path1
+vpm.blocks.22.norm2
+vpm.blocks.22.mlp
+vpm.blocks.22.mlp.fc1
+vpm.blocks.22.mlp.act
+vpm.blocks.22.mlp.drop1
+vpm.blocks.22.mlp.norm
+vpm.blocks.22.mlp.fc2
+vpm.blocks.22.mlp.drop2
+vpm.blocks.22.ls2
+vpm.blocks.22.drop_path2
+vpm.blocks.23
+vpm.blocks.23.norm1
+vpm.blocks.23.attn
+vpm.blocks.23.attn.qkv
+vpm.blocks.23.attn.q_norm
+vpm.blocks.23.attn.k_norm
+vpm.blocks.23.attn.attn_drop
+vpm.blocks.23.attn.proj
+vpm.blocks.23.attn.proj_drop
+vpm.blocks.23.ls1
+vpm.blocks.23.drop_path1
+vpm.blocks.23.norm2
+vpm.blocks.23.mlp
+vpm.blocks.23.mlp.fc1
+vpm.blocks.23.mlp.act
+vpm.blocks.23.mlp.drop1
+vpm.blocks.23.mlp.norm
+vpm.blocks.23.mlp.fc2
+vpm.blocks.23.mlp.drop2
+vpm.blocks.23.ls2
+vpm.blocks.23.drop_path2
+vpm.blocks.24
+vpm.blocks.24.norm1
+vpm.blocks.24.attn
+vpm.blocks.24.attn.qkv
+vpm.blocks.24.attn.q_norm
+vpm.blocks.24.attn.k_norm
+vpm.blocks.24.attn.attn_drop
+vpm.blocks.24.attn.proj
+vpm.blocks.24.attn.proj_drop
+vpm.blocks.24.ls1
+vpm.blocks.24.drop_path1
+vpm.blocks.24.norm2
+vpm.blocks.24.mlp
+vpm.blocks.24.mlp.fc1
+vpm.blocks.24.mlp.act
+vpm.blocks.24.mlp.drop1
+vpm.blocks.24.mlp.norm
+vpm.blocks.24.mlp.fc2
+vpm.blocks.24.mlp.drop2
+vpm.blocks.24.ls2
+vpm.blocks.24.drop_path2
+vpm.blocks.25
+vpm.blocks.25.norm1
+vpm.blocks.25.attn
+vpm.blocks.25.attn.qkv
+vpm.blocks.25.attn.q_norm
+vpm.blocks.25.attn.k_norm
+vpm.blocks.25.attn.attn_drop
+vpm.blocks.25.attn.proj
+vpm.blocks.25.attn.proj_drop
+vpm.blocks.25.ls1
+vpm.blocks.25.drop_path1
+vpm.blocks.25.norm2
+vpm.blocks.25.mlp
+vpm.blocks.25.mlp.fc1
+vpm.blocks.25.mlp.act
+vpm.blocks.25.mlp.drop1
+vpm.blocks.25.mlp.norm
+vpm.blocks.25.mlp.fc2
+vpm.blocks.25.mlp.drop2
+vpm.blocks.25.ls2
+vpm.blocks.25.drop_path2
+vpm.norm
+vpm.attn_pool
+vpm.fc_norm
+vpm.head_drop
+vpm.head
+resampler
+resampler.kv_proj
+resampler.attn
+resampler.attn.out_proj
+resampler.ln_q
+resampler.ln_kv
+resampler.ln_post
diff --git a/project_root.py b/project_root.py
new file mode 100644
index 0000000000000000000000000000000000000000..579e4bd8d03b4953242d3942ea033acc13f15831
--- /dev/null
+++ b/project_root.py
@@ -0,0 +1,4 @@
+"""Utility for project root."""
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).resolve().parent
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6783e0a4d451558c88f8bfe1d0f7ec6bbb145b11
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,215 @@
+absl-py==2.3.1
+accelerate==1.10.1
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+annotated-types==0.7.0
+anyio==4.10.0
+argon2-cffi==25.1.0
+argon2-cffi-bindings==25.1.0
+arrow==1.3.0
+asttokens==3.0.0
+astunparse==1.6.3
+async-lru==2.0.5
+async-timeout==5.0.1
+attrs==25.3.0
+babel==2.17.0
+beautifulsoup4==4.13.4
+bleach==6.2.0
+Brotli==1.1.0
+certifi==2024.12.14
+cffi==1.17.1
+cfgv==3.4.0
+charset-normalizer==3.4.1
+click==8.3.0
+comm==0.2.3
+contourpy==1.3.2
+cycler==0.12.1
+datasets==3.6.0
+debugpy==1.8.16
+decorator==5.2.1
+defusedxml==0.7.1
+dill==0.3.8
+distlib==0.3.9
+einops==0.8.1
+exceptiongroup==1.3.0
+executing==2.2.0
+fastapi==0.117.1
+fastjsonschema==2.21.2
+ffmpy==0.6.1
+filelock==3.17.0
+flatbuffers==25.9.23
+fonttools==4.59.1
+fqdn==1.5.1
+frozenlist==1.7.0
+fsspec==2024.12.0
+gast==0.6.0
+google-pasta==0.2.0
+gradio==5.47.2
+gradio_client==1.13.3
+groovy==0.1.2
+grpcio==1.75.1
+h11==0.16.0
+h5py==3.14.0
+hf-xet==1.1.10
+httpcore==1.0.9
+httpx==0.28.1
+huggingface-hub==0.35.1
+identify==2.6.6
+idna==3.10
+ipykernel==6.30.1
+ipython==8.37.0
+ipywidgets==8.1.7
+isoduration==20.11.0
+jedi==0.19.2
+Jinja2==3.1.5
+joblib==1.5.2
+json5==0.12.1
+jsonpointer==3.0.0
+jsonschema==4.25.1
+jsonschema-specifications==2025.4.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.12.0
+jupyter-lsp==2.2.6
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyter_server==2.16.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.4.6
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.15
+keras==3.11.3
+kiwisolver==1.4.9
+lark==1.2.2
+libclang==18.1.1
+Markdown==3.9
+markdown-it-py==4.0.0
+MarkupSafe==3.0.2
+matplotlib==3.10.5
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.1.3
+ml_dtypes==0.5.3
+mpmath==1.3.0
+multidict==6.6.4
+multiprocess==0.70.16
+namex==0.1.0
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.4.2
+nodeenv==1.9.1
+notebook==7.4.5
+notebook_shim==0.2.4
+numpy==2.2.2
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-nccl-cu12==2.27.3
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvtx-cu12==12.8.90
+opencv-python==4.12.0.88
+opt_einsum==3.4.0
+optree==0.17.0
+orjson==3.11.3
+overrides==7.7.0
+packaging==24.2
+pandas==2.3.1
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pillow==11.1.0
+platformdirs==4.3.6
+pre_commit==4.1.0
+prometheus_client==0.22.1
+prompt_toolkit==3.0.51
+propcache==0.3.2
+protobuf==6.32.1
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==21.0.0
+pycparser==2.22
+pydantic==2.11.9
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+python-json-logger==3.3.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq==27.0.1
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rfc3987-syntax==1.1.0
+rich==14.1.0
+rpds-py==0.27.0
+ruff==0.13.2
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-learn==1.7.1
+scipy==1.15.3
+seaborn==0.13.2
+semantic-version==2.10.0
+Send2Trash==1.8.3
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.7
+stack-data==0.6.3
+starlette==0.48.0
+sympy==1.14.0
+tensorboard==2.20.0
+tensorboard-data-server==0.7.2
+tensorflow==2.20.0
+termcolor==3.1.0
+terminado==0.18.1
+threadpoolctl==3.6.0
+tinycss2==1.4.0
+tokenizers==0.21.0
+tomli==2.2.1
+tomlkit==0.13.3
+torch==2.8.0
+torchvision==0.23.0
+tornado==6.5.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.51.3
+triton==3.4.0
+typer==0.19.2
+types-python-dateutil==2.9.0.20250809
+typing-inspection==0.4.1
+typing_extensions==4.12.2
+tzdata==2025.2
+uri-template==1.3.0
+urllib3==2.3.0
+uvicorn==0.37.0
+virtualenv==20.29.1
+wcwidth==0.2.13
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==15.0.1
+Werkzeug==3.1.3
+wget==3.2
+widgetsnbextension==4.0.14
+wrapt==1.17.3
+xxhash==3.5.0
+yarl==1.20.1
diff --git a/src/__pycache__/main.cpython-310.pyc b/src/__pycache__/main.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b5470c174c7c4c364130406d00e0672ef2d74328
Binary files /dev/null and b/src/__pycache__/main.cpython-310.pyc differ
diff --git a/src/concepts/__pycache__/pca.cpython-310.pyc b/src/concepts/__pycache__/pca.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..38da474276cfce204b4c2cbebc5133a0e8fc302e
Binary files /dev/null and b/src/concepts/__pycache__/pca.cpython-310.pyc differ
diff --git a/src/concepts/__pycache__/pca.cpython-38.pyc b/src/concepts/__pycache__/pca.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f3521e6de2fcd7bdb744d4feb342d7e2cbdf9bfc
Binary files /dev/null and b/src/concepts/__pycache__/pca.cpython-38.pyc differ
diff --git a/src/concepts/pca.py b/src/concepts/pca.py
new file mode 100644
index 0000000000000000000000000000000000000000..a794264e0af1c5db6f49bc950d510fa42f505fe6
--- /dev/null
+++ b/src/concepts/pca.py
@@ -0,0 +1,788 @@
+"""VLM concept interpretability analysis with PCA sensitivity plots."""
+
+from __future__ import annotations
+
+import io
+import os
+import re
+import sqlite3
+from collections import defaultdict
+from typing import Any, Optional
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from sklearn.decomposition import PCA
+
+
+def cosine_similarity_numpy(a: np.ndarray, b: np.ndarray) -> float:
+ """Calculate cosine similarity between two vectors using numpy with robust error handling.
+
+ Args:
+ a: First vector
+ b: Second vector
+
+ Returns:
+ Cosine similarity score between 0 and 1
+ """
+ # Check for NaN or infinite values
+ if not (np.isfinite(a).all() and np.isfinite(b).all()):
+ print('Warning: NaN or infinite values detected in tensors')
+ return 0.0
+
+ norm_a = np.linalg.norm(a)
+ norm_b = np.linalg.norm(b)
+
+ # Handle zero vectors or invalid norms
+ if norm_a == 0 or norm_b == 0 or not (np.isfinite(norm_a) and np.isfinite(norm_b)):
+ return 0.0
+
+ dot_product = np.dot(a, b)
+
+ # Check if dot product is valid
+ if not np.isfinite(dot_product):
+ print('Warning: Invalid dot product')
+ return 0.0
+
+ return dot_product / (norm_a * norm_b)
+
+
+def extract_tensor_from_object(tensor_obj: Any) -> Optional[torch.Tensor]:
+ """Return a single 1D embedding vector from a deserialized object.
+
+ Prefer pooled outputs; if we get sequence/token grids, mean-pool.
+
+ Args:
+ tensor_obj: Deserialized tensor object from model output
+
+ Returns:
+ Single 1D torch tensor or None if extraction fails
+ """
+ def _to_1d(t: Any) -> Optional[torch.Tensor]:
+ if not torch.is_tensor(t):
+ return None
+ if t.dim() == 3:
+ t = t[0] # assume batch size 1
+ t = t.mean(dim=0) # mean over seq
+ elif t.dim() == 2:
+ t = t.mean(dim=0) # mean over seq
+ elif t.dim() == 1:
+ pass
+ else:
+ t = t.flatten()
+ return t
+
+ if hasattr(tensor_obj, 'pooler_output'):
+ t = _to_1d(tensor_obj.pooler_output)
+ if t is not None:
+ return t
+ if hasattr(tensor_obj, 'last_hidden_state'):
+ t = _to_1d(tensor_obj.last_hidden_state)
+ if t is not None:
+ return t
+ if hasattr(tensor_obj, 'hidden_states'):
+ hs = tensor_obj.hidden_states
+ if isinstance(hs, (list, tuple)) and len(hs) > 0:
+ t = _to_1d(hs[-1]) # last layer
+ if t is not None:
+ return t
+ else:
+ t = _to_1d(hs)
+ if t is not None:
+ return t
+ if torch.is_tensor(tensor_obj):
+ return _to_1d(tensor_obj)
+
+ for attr_name in dir(tensor_obj):
+ if attr_name.startswith('_'):
+ continue
+ try:
+ attr_value = getattr(tensor_obj, attr_name)
+ if torch.is_tensor(attr_value):
+ t = _to_1d(attr_value)
+ if t is not None:
+ print(f"Using attribute \'{attr_name}\' from {type(tensor_obj).__name__}")
+ return t
+ except Exception:
+ continue
+
+ print(f'Could not find tensor data in {type(tensor_obj).__name__}')
+ return None
+
+
+def load_tensors_by_layer(db_path: str, device: str = 'cpu') -> dict[str, list[tuple[np.ndarray, Any, int, str]]]:
+ """Load all tensors from a database, grouped by layer.
+
+ Args:
+ db_path: Path to the SQLite database
+ device: PyTorch device for tensor loading
+
+ Returns:
+ Dictionary mapping layer names to lists of (tensor_np, label, row_id, image_filename) tuples
+ """
+ connection = sqlite3.connect(db_path)
+ cursor = connection.cursor()
+
+ # First check what columns are available
+ cursor.execute('PRAGMA table_info(tensors)')
+ columns = [column[1] for column in cursor.fetchall()]
+ print(f'Available columns in {db_path}: {columns}')
+
+ query = 'SELECT rowid, layer, tensor, label, image_path FROM tensors'
+ cursor.execute(query)
+ results = cursor.fetchall()
+ connection.close()
+
+ layers_dict = defaultdict(list)
+
+ for result in results:
+ row_id, layer, tensor_bytes, label, image_filename = result
+
+ try:
+ # Load tensor object
+ tensor_obj = torch.load(io.BytesIO(tensor_bytes), map_location=device, weights_only=False)
+
+ # Extract actual tensor from object
+ tensor = extract_tensor_from_object(tensor_obj)
+ if tensor is None:
+ print(f'Warning: Could not extract tensor from row {row_id} in layer {layer}')
+ continue
+
+ # Convert to numpy for analysis
+ if tensor.requires_grad:
+ tensor_np = tensor.detach().cpu().numpy().flatten()
+ else:
+ tensor_np = tensor.cpu().numpy().flatten()
+
+ layers_dict[layer].append((tensor_np, label, row_id, image_filename))
+
+ except Exception as e:
+ print(f'Warning: Could not deserialize tensor at row {row_id}, layer {layer}: {e}')
+ continue
+
+ return dict(layers_dict)
+
+
+def extract_concept_from_filename(image_filename: str) -> Optional[str]:
+ """Extract concept name from image filename.
+
+ Args:
+ image_filename: e.g., './data/concepts/images/blue_01.jpg'
+
+ Returns:
+ concept name, e.g., 'blue'
+ """
+ if not image_filename:
+ return None
+
+ # Get the base filename without path and extension
+ base_name = os.path.splitext(os.path.basename(image_filename))[0]
+
+ # Extract concept name (everything before the last underscore and number)
+ # e.g., 'blue_01' -> 'blue'
+ match = re.match(r'^(.+)_\d+$', base_name)
+ if match:
+ return match.group(1)
+ else:
+ # If no underscore pattern, use the whole base name
+ return base_name
+
+
+def group_tensors_by_concept(layer_tensors: list[tuple[np.ndarray, Any, int, str]]) -> dict[str, list[tuple[np.ndarray, Any, int, str]]]:
+ """Group tensors by concept based on their image filenames.
+
+ Args:
+ layer_tensors: List of (tensor_np, label, row_id, image_filename) tuples
+
+ Returns:
+ Dictionary mapping concept names to lists of tensor data
+ """
+ concept_groups = defaultdict(list)
+
+ for tensor_data in layer_tensors:
+ tensor_np, label, row_id, image_filename = tensor_data
+ concept = extract_concept_from_filename(image_filename)
+
+ if concept:
+ concept_groups[concept].append(tensor_data)
+ else:
+ print(f'Warning: Could not extract concept from filename: {image_filename}')
+
+ return dict(concept_groups)
+
+
+def apply_pca_to_layer(
+ target_tensors: list[tuple[np.ndarray, Any, int, str]],
+ concept_tensors: list[tuple[np.ndarray, Any, int, str]],
+ n_components: Optional[int] = None
+) -> tuple[list[tuple[np.ndarray, Any, int, str]], list[tuple[np.ndarray, Any, int, str]], Optional[PCA]]:
+ """Apply PCA dimensionality reduction to tensors from the same layer.
+
+ PCA is fit on CONCEPT TENSORS ONLY to avoid target leakage.
+
+ Args:
+ target_tensors: List of target tensor data
+ concept_tensors: List of concept tensor data
+ n_components: Number of PCA components (None to skip PCA)
+
+ Returns:
+ Tuple of (transformed_target_tensors, transformed_concept_tensors, pca_model)
+ """
+ if n_components is None:
+ return target_tensors, concept_tensors, None
+
+ print(f'Applying PCA with {n_components} components...')
+
+ concept_arrays = [data[0] for data in concept_tensors]
+
+ if len(concept_arrays) == 0:
+ print('Warning: no concept tensors to fit PCA; skipping PCA.')
+ return target_tensors, concept_tensors, None
+
+ pca = PCA(n_components=n_components, random_state=42)
+ pca.fit(np.vstack(concept_arrays))
+
+ print(f'PCA explained variance ratio: {pca.explained_variance_ratio_}')
+ print(f'Total explained variance: {pca.explained_variance_ratio_.sum():.4f}')
+
+ transformed_target_tensors = []
+ for tensor_np, label, row_id, image_filename in target_tensors:
+ transformed = pca.transform(tensor_np.reshape(1, -1)).flatten()
+ transformed_target_tensors.append((transformed, label, row_id, image_filename))
+
+ transformed_concept_tensors = []
+ for tensor_np, label, row_id, image_filename in concept_tensors:
+ transformed = pca.transform(tensor_np.reshape(1, -1)).flatten()
+ transformed_concept_tensors.append((transformed, label, row_id, image_filename))
+
+ return transformed_target_tensors, transformed_concept_tensors, pca
+
+
+def analyze_target_vs_concepts(
+ target_tensors: list[tuple[np.ndarray, Any, int, str]],
+ concept_tensors: list[tuple[np.ndarray, Any, int, str]],
+ layer_name: str
+) -> list[dict[str, Any]]:
+ """Analyze similarity between target images and concept groups.
+
+ Adds centroid-based metrics while preserving existing stats.
+
+ Args:
+ target_tensors: List of target tensor data
+ concept_tensors: List of concept tensor data
+ layer_name: Name of the current layer
+
+ Returns:
+ List of analysis results for each target image
+ """
+ concept_groups = group_tensors_by_concept(concept_tensors)
+ print(f'Found {len(concept_groups)} concepts: {list(concept_groups.keys())}')
+ for concept, tensors in concept_groups.items():
+ print(f' {concept}: {len(tensors)} images')
+
+ # Precompute concept centroids
+ concept_centroids = {}
+ for concept_name, tensor_list in concept_groups.items():
+ vecs = [t[0] for t in tensor_list]
+ if len(vecs) > 0:
+ concept_centroids[concept_name] = np.mean(np.vstack(vecs), axis=0)
+ else:
+ concept_centroids[concept_name] = None
+
+ results = []
+
+ for target_data in target_tensors:
+ target_tensor, target_label, target_row_id, target_image_filename = target_data
+
+ target_result = {
+ 'layer': layer_name,
+ 'target_row_id': target_row_id,
+ 'target_label': target_label,
+ 'target_image_filename': target_image_filename,
+ 'concept_analysis': {}
+ }
+
+ for concept_name, concept_tensor_list in concept_groups.items():
+ similarities = []
+
+ # Original per-prototype pairwise similarities
+ for concept_data in concept_tensor_list:
+ concept_tensor, concept_label, concept_row_id, concept_image_filename = concept_data
+ if target_tensor.shape != concept_tensor.shape:
+ print(f'Warning: Shape mismatch between target {target_row_id} and concept {concept_row_id}')
+ continue
+ sim = cosine_similarity_numpy(target_tensor, concept_tensor)
+ similarities.append(sim)
+
+ concept_stats = {}
+ if similarities:
+ similarities = np.array(similarities)
+ distances = 1.0 - similarities
+
+ concept_stats.update({
+ 'min_similarity': float(np.min(similarities)),
+ 'max_similarity': float(np.max(similarities)),
+ 'mean_similarity': float(np.mean(similarities)),
+ 'min_distance': float(np.min(distances)),
+ 'mean_distance': float(np.mean(distances)),
+ 'num_comparisons': int(len(similarities)),
+ })
+
+ # New: centroid-based similarity
+ centroid = concept_centroids.get(concept_name, None)
+ if centroid is not None and centroid.shape == target_tensor.shape:
+ cen_sim = cosine_similarity_numpy(target_tensor, centroid)
+ cen_ang = float(np.degrees(np.arccos(np.clip(cen_sim, -1.0, 1.0))))
+ concept_stats.update({
+ 'centroid_similarity': float(cen_sim),
+ 'centroid_angular_deg': cen_ang
+ })
+
+ if concept_stats:
+ target_result['concept_analysis'][concept_name] = concept_stats
+
+ results.append(target_result)
+
+ target_display = target_image_filename if target_image_filename else f'Target_{target_row_id}'
+ print(f'Analyzed {target_display} against {len(concept_groups)} concepts')
+
+ return results
+
+
+def concept_similarity_analysis(
+ target_db_path: str,
+ concept_db_path: str,
+ layer_names: Optional[list[str]] = None,
+ n_pca_components: Optional[int] = None,
+ device: str = 'cpu'
+) -> dict[str, dict[str, Any]]:
+ """Main function for concept-based similarity analysis.
+
+ Args:
+ target_db_path: Path to target images database
+ concept_db_path: Path to concept images database
+ layer_names: List of layer names to analyze (None for all common layers)
+ n_pca_components: Number of PCA components (None to skip PCA)
+ device: PyTorch device
+
+ Returns:
+ Dictionary of analysis results by layer
+ """
+ print('Starting concept-based similarity analysis...')
+ print(f'Target DB: {target_db_path}')
+ print(f'Concept DB: {concept_db_path}')
+ print(f'PCA components: {n_pca_components}')
+
+ # Load tensors from both databases
+ print(f'\nLoading tensors from {target_db_path}...')
+ target_tensors = load_tensors_by_layer(target_db_path, device)
+
+ print(f'Loading tensors from {concept_db_path}...')
+ concept_tensors = load_tensors_by_layer(concept_db_path, device)
+
+ # Find common layers
+ common_layers = set(target_tensors.keys()) & set(concept_tensors.keys())
+ print(f'\nFound {len(common_layers)} common layers: {sorted(common_layers)}')
+
+ if not common_layers:
+ print('No common layers found between databases!')
+ return {}
+
+ # Determine which layers to analyze
+ if layer_names is None:
+ layers_to_analyze = sorted(common_layers)
+ print('Analyzing all common layers')
+ else:
+ if isinstance(layer_names, str):
+ layer_names = [layer_names]
+ layers_to_analyze = [layer for layer in layer_names if layer in common_layers]
+ print(f'Analyzing specified layers: {layers_to_analyze}')
+
+ # Warn about missing layers
+ missing_layers = set(layer_names) - common_layers
+ if missing_layers:
+ print(f'Warning: Requested layers not found: {missing_layers}')
+
+ if not layers_to_analyze:
+ print('No valid layers to analyze!')
+ return {}
+
+ all_results = {}
+
+ # Process each layer
+ for layer in layers_to_analyze:
+ print(f'\n{"=" * 50}')
+ print(f'Processing Layer: {layer}')
+ print(f'{"=" * 50}')
+
+ target_layer_tensors = target_tensors[layer]
+ concept_layer_tensors = concept_tensors[layer]
+
+ print(f'Target tensors: {len(target_layer_tensors)}')
+ print(f'Concept tensors: {len(concept_layer_tensors)}')
+
+ # Apply PCA if requested
+ if n_pca_components is not None:
+ target_layer_tensors, concept_layer_tensors, pca_model = apply_pca_to_layer(
+ target_layer_tensors, concept_layer_tensors, n_pca_components
+ )
+ else:
+ pca_model = None
+
+ # Analyze similarities
+ layer_results = analyze_target_vs_concepts(
+ target_layer_tensors, concept_layer_tensors, layer
+ )
+
+ all_results[layer] = {
+ 'results': layer_results,
+ 'pca_model': pca_model,
+ 'n_pca_components': n_pca_components
+ }
+
+ # Print layer summary
+ if layer_results:
+ print(f"\nLayer \'{layer}\' Summary:")
+ print(f' Analyzed {len(layer_results)} target images')
+
+ # Get all concept names from first result
+ if layer_results[0]['concept_analysis']:
+ concept_names = list(layer_results[0]['concept_analysis'].keys())
+ print(f' Against {len(concept_names)} concepts: {concept_names}')
+
+ return all_results
+
+
+def save_concept_analysis_results(results: dict[str, dict[str, Any]], output_file: str = 'output/concept_similarity_analysis.txt') -> None:
+ """Save concept analysis results to a text file.
+
+ Args:
+ results: Dictionary of analysis results by layer
+ output_file: Output filename
+ """
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
+
+ with open(output_file, 'w') as f:
+ f.write('Concept-Based VLM Embedding Similarity Analysis\n')
+ f.write('=' * 60 + '\n\n')
+
+ for layer, layer_data in results.items():
+ layer_results = layer_data['results']
+ n_pca_components = layer_data['n_pca_components']
+
+ f.write(f'Layer: {layer}\n')
+ if n_pca_components:
+ f.write(f'PCA Components: {n_pca_components}\n')
+ f.write('-' * 40 + '\n\n')
+
+ for result in layer_results:
+ target_display = result['target_image_filename'] or f'Target_{result["target_row_id"]}'
+ f.write(f'Target: {target_display}\n')
+
+ for concept_name, stats in result['concept_analysis'].items():
+ f.write(f' {concept_name}:\n')
+ if 'min_similarity' in stats:
+ f.write(f' Min Similarity: {stats["min_similarity"]:.4f}\n')
+ f.write(f' Max Similarity: {stats["max_similarity"]:.4f}\n')
+ f.write(f' Mean Similarity: {stats["mean_similarity"]:.4f}\n')
+ f.write(f' Min Distance: {stats["min_distance"]:.4f}\n')
+ f.write(f' Mean Distance: {stats["mean_distance"]:.4f}\n')
+ f.write(f' Comparisons: {stats["num_comparisons"]}\n')
+ if 'centroid_similarity' in stats:
+ f.write(f' Centroid Similarity: {stats["centroid_similarity"]:.4f}\n')
+ f.write(f' Centroid Angular (deg): {stats["centroid_angular_deg"]:.2f}\n')
+ f.write('\n')
+
+ f.write('\n')
+
+ print(f'Results saved to {output_file}')
+
+
+def analyze_concept_trends(results: dict[str, dict[str, Any]]) -> None:
+ """Analyze trends across all targets and concepts.
+
+ Args:
+ results: Dictionary of analysis results by layer
+ """
+ print(f'\n{"=" * 50}')
+ print('CONCEPT ANALYSIS TRENDS')
+ print(f'{"=" * 50}')
+
+ for layer, layer_data in results.items():
+ layer_results = layer_data['results']
+ n_pca_components = layer_data['n_pca_components']
+
+ print(f'\nLayer: {layer}')
+ if n_pca_components:
+ print(f'PCA Components: {n_pca_components}')
+ print('-' * 30)
+
+ if not layer_results:
+ print('No results for this layer')
+ continue
+
+ concept_stats = defaultdict(list)
+ for result in layer_results:
+ for concept_name, stats in result['concept_analysis'].items():
+ concept_stats[concept_name].append(stats)
+
+ for concept_name in sorted(concept_stats.keys()):
+ stats_list = concept_stats[concept_name]
+ all_min_sim = [s['min_similarity'] for s in stats_list if 'min_similarity' in s]
+ all_max_sim = [s['max_similarity'] for s in stats_list if 'max_similarity' in s]
+ all_mean_sim = [s['mean_similarity'] for s in stats_list if 'mean_similarity' in s]
+ all_min_dist = [s['min_distance'] for s in stats_list if 'min_distance' in s]
+ all_cen_sim = [s['centroid_similarity'] for s in stats_list if 'centroid_similarity' in s]
+
+ print(f' {concept_name}:')
+ if all_min_sim:
+ print(f' Avg Min Similarity: {np.mean(all_min_sim):.4f}')
+ if all_max_sim:
+ print(f' Avg Max Similarity: {np.mean(all_max_sim):.4f}')
+ if all_mean_sim:
+ print(f' Avg Mean Similarity: {np.mean(all_mean_sim):.4f}')
+ if all_min_dist:
+ print(f' Avg Min Distance: {np.mean(all_min_dist):.4f}')
+ if all_cen_sim:
+ print(f' Avg Centroid Cosine: {np.mean(all_cen_sim):.4f}')
+ print(f' Targets analyzed: {len(stats_list)}')
+
+
+def plot_pca_sensitivity_analysis(
+ target_db_path: str,
+ concept_db_path: str,
+ layer_names: Optional[list[str]] = None,
+ max_components: int = 50,
+ device: str = 'cpu',
+ output_dir: str = 'output',
+ raw_data_dir: Optional[str] = None
+) -> None:
+ """Plot centroid similarity vs number of PCA components for interpretability analysis.
+
+ Args:
+ target_db_path: Path to target images database
+ concept_db_path: Path to concept images database
+ layer_names: List of layer names to analyze (None for all common layers)
+ max_components: Maximum number of PCA components to test
+ device: PyTorch device
+ output_dir: Directory to save plots
+ raw_data_dir: Directory to save raw data, and will not plot if set
+ """
+ print(f'\n{"=" * 50}')
+ print('PCA SENSITIVITY ANALYSIS')
+ print(f'{"=" * 50}')
+
+ # Load tensors from both databases
+ print(f'Loading tensors from {target_db_path}...')
+ target_tensors = load_tensors_by_layer(target_db_path, device)
+
+ print(f'Loading tensors from {concept_db_path}...')
+ concept_tensors = load_tensors_by_layer(concept_db_path, device)
+
+ # Find common layers
+ common_layers = set(target_tensors.keys()) & set(concept_tensors.keys())
+
+ # Determine which layers to analyze
+ if layer_names is None:
+ layers_to_analyze = sorted(common_layers)
+ else:
+ if isinstance(layer_names, str):
+ layer_names = [layer_names]
+ layers_to_analyze = [layer for layer in layer_names if layer in common_layers]
+
+ os.makedirs(output_dir, exist_ok=True)
+
+ # Process each layer
+ for layer in layers_to_analyze:
+ print(f'\nProcessing layer: {layer}')
+
+ target_layer_tensors = target_tensors[layer]
+ concept_layer_tensors = concept_tensors[layer]
+
+ if not target_layer_tensors or not concept_layer_tensors:
+ print(f'Skipping layer {layer} - insufficient data')
+ continue
+
+ # Determine actual max components based on data
+ concept_arrays = [data[0] for data in concept_layer_tensors]
+ if not concept_arrays:
+ continue
+
+ n_features = concept_arrays[0].shape[0]
+ n_samples = len(concept_arrays)
+ actual_max_components = min(max_components, n_features, n_samples)
+
+ print(f' Features: {n_features}, Samples: {n_samples}')
+ print(f' Testing PCA components: 1 to {actual_max_components}')
+
+ # Component range to test
+ component_range = range(1, actual_max_components + 1)
+
+ # Store results for each target image
+ target_results: dict[str, dict[str, Any]] = {}
+
+ # Test each number of components
+ for n_comp in component_range:
+ print(f' Testing {n_comp} components...', end='', flush=True)
+
+ # Apply PCA with n_comp components
+ transformed_targets, transformed_concepts, _ = apply_pca_to_layer(
+ target_layer_tensors, concept_layer_tensors, n_comp
+ )
+
+ # Analyze similarities
+ layer_results = analyze_target_vs_concepts(
+ transformed_targets, transformed_concepts, layer
+ )
+
+ # Store results for each target
+ for result in layer_results:
+ target_id = result['target_row_id']
+ target_name = result['target_image_filename'] or f'Target_{target_id}'
+
+ if target_name not in target_results:
+ target_results[target_name] = {
+ 'n_components': [],
+ 'concept_similarities': defaultdict(list)
+ }
+
+ target_results[target_name]['n_components'].append(n_comp)
+
+ # Store centroid similarities for each concept
+ for concept_name, stats in result['concept_analysis'].items():
+ if 'centroid_similarity' in stats:
+ similarity = stats['centroid_similarity']
+ target_results[target_name]['concept_similarities'][concept_name].append(similarity)
+
+ print(' done')
+
+ if raw_data_dir is not None:
+ raw_data_path = f'{raw_data_dir}/raw_{layer}.json'
+ import json
+ with open(raw_data_path, 'w') as fp:
+ json.dump(target_results, fp)
+ print('We do not plot if saving raw data.')
+ return
+
+ # Create plots for this layer
+ if target_results:
+ # Get all concepts from the first target
+ first_target = next(iter(target_results.values()))
+ all_concepts = list(first_target['concept_similarities'].keys())
+
+ # Create subplots - one for each concept
+ n_concepts = len(all_concepts)
+ n_cols = min(3, n_concepts)
+ n_rows = (n_concepts + n_cols - 1) // n_cols
+
+ fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 4 * n_rows))
+ fig.suptitle(f'Centroid Similarity vs PCA Components - Layer: {layer}', fontsize=16)
+
+ if n_concepts == 1:
+ axes = [axes]
+ elif n_rows == 1:
+ axes = axes if n_concepts > 1 else [axes]
+ else:
+ axes = axes.flatten()
+
+ # Plot each concept
+ for concept_idx, concept_name in enumerate(all_concepts):
+ ax = axes[concept_idx] if concept_idx < len(axes) else None
+ if ax is None:
+ continue
+
+ # Plot lines for each target image
+ for target_name, target_data in target_results.items():
+ n_components = target_data['n_components']
+ similarities = target_data['concept_similarities'][concept_name]
+
+ if len(similarities) == len(n_components):
+ # Clean target name for legend
+ clean_target_name = os.path.splitext(os.path.basename(target_name))[0]
+ ax.plot(n_components, similarities,
+ marker='o', markersize=3, linewidth=1.5,
+ label=clean_target_name, alpha=0.8)
+
+ ax.set_xlabel('Number of PCA Components')
+ ax.set_ylabel('Centroid Similarity')
+ ax.set_title(f'Concept: {concept_name}')
+ ax.grid(True, alpha=0.3)
+ ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
+
+ # Hide unused subplots
+ for idx in range(n_concepts, len(axes)):
+ axes[idx].set_visible(False)
+
+ plt.tight_layout()
+
+ # Save plot
+ plot_filename = f'{output_dir}/pca_sensitivity_layer_{layer.replace("/", "_")}.png'
+ plt.savefig(plot_filename, dpi=150, bbox_inches='tight')
+ plt.close()
+
+ print(f' Plot saved: {plot_filename}')
+
+ else:
+ print(f' No results to plot for layer {layer}')
+
+ print(f'\nPCA sensitivity analysis complete. Plots saved in {output_dir}/')
+
+
+if __name__ == '__main__':
+ # Configuration
+ target_db_path = 'output/llava.db'
+ concept_db_path = 'output/llava-concepts-colors.db'
+
+ # Analysis parameters
+ layer_names = None # None for all layers, or specify: ['layer_name1', 'layer_name2']
+ n_pca_components = None # None for raw embeddings, or specify: 5, 10, etc. (production: use None)
+
+ print('=' * 60)
+ print('CONCEPT-BASED VLM EMBEDDING ANALYSIS')
+ print('=' * 60)
+
+ try:
+ # Run main analysis
+ results = concept_similarity_analysis(
+ target_db_path=target_db_path,
+ concept_db_path=concept_db_path,
+ layer_names=layer_names,
+ n_pca_components=n_pca_components,
+ device='cpu'
+ )
+
+ if results:
+ # Save detailed results
+ output_file = 'output/concept_similarity_analysis.txt'
+ save_concept_analysis_results(results, output_file)
+
+ # Show aggregate trends
+ analyze_concept_trends(results)
+
+ print(f'\n{"=" * 50}')
+ print('ANALYSIS COMPLETE')
+ print(f'{"=" * 50}')
+ print(f'Processed {len(results)} layers')
+ print(f'Results saved to: {output_file}')
+
+ else:
+ print('No results generated. Check database compatibility and parameters.')
+
+ # Run PCA sensitivity analysis (separate from main analysis)
+ print(f'\n{"=" * 60}')
+ print('STARTING PCA SENSITIVITY ANALYSIS')
+ print(f'{"=" * 60}')
+
+ plot_pca_sensitivity_analysis(
+ target_db_path=target_db_path,
+ concept_db_path=concept_db_path,
+ layer_names=layer_names, # Same layers as main analysis
+ max_components=50, # Adjust based on your data size
+ device='cpu',
+ output_dir='output'
+ )
+
+ except Exception as e:
+ print(f'Error during analysis: {e}')
+ import traceback
+ traceback.print_exc()
diff --git a/src/concepts/pca_knn.py b/src/concepts/pca_knn.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f48c6c956f01be5b0d63c6eef5673ce8c1a72f1
--- /dev/null
+++ b/src/concepts/pca_knn.py
@@ -0,0 +1,538 @@
+"""Instance-based k-NN extension for VLM concept analysis.
+
+This module extends the existing VLM concept analysis with nearest-neighbor
+prototype-based classification. It reuses the existing functions and adds
+instance-based readout capabilities.
+"""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from typing import Any, Optional
+
+import numpy as np
+# Import from the existing analysis module
+from pca import (analyze_concept_trends, cosine_similarity_numpy,
+ extract_concept_from_filename, group_tensors_by_concept,
+ load_tensors_by_layer)
+
+
+def _build_normalized_prototype_bank(
+ concept_tensors: list[tuple[np.ndarray, Any, int, str]]
+) -> tuple[Optional[np.ndarray], list[dict[str, Any]]]:
+ """Build an (N,d) bank of L2-normalized prototype vectors and metadata.
+
+ Args:
+ concept_tensors: List of tuples (vec, label, row_id, image_path)
+
+ Returns:
+ Tuple of (X matrix (N,d), meta list of dicts with concept/row_id/image_path)
+ """
+ X_list, meta = [], []
+ for vec, label, row_id, image_path in concept_tensors:
+ if vec is None:
+ continue
+ norm = np.linalg.norm(vec)
+ if not np.isfinite(norm) or norm == 0:
+ continue
+ X_list.append(vec / norm)
+ meta.append({
+ 'concept': extract_concept_from_filename(image_path),
+ 'row_id': row_id,
+ 'image_path': image_path,
+ 'label': label
+ })
+ if not X_list:
+ return None, []
+ X = np.vstack(X_list)
+ return X, meta
+
+
+def _nearest_prototypes(
+ target_vec: np.ndarray,
+ X_bank: Optional[np.ndarray],
+ meta: list[dict[str, Any]],
+ topk: int = 5
+) -> list[dict[str, Any]]:
+ """Compute cosine similarities target vs all prototypes (already normalized).
+
+ Args:
+ target_vec: Target vector (d,), will be L2-normalized here
+ X_bank: Prototype bank matrix (N, d), already normalized
+ meta: List of metadata dicts for each prototype
+ topk: Number of top neighbors to return
+
+ Returns:
+ Top list of dicts sorted by similarity with keys:
+ ['concept', 'row_id', 'image_path', 'label', 'sim']
+ """
+ if X_bank is None or len(meta) == 0:
+ return []
+
+ # L2-normalize target
+ t = target_vec
+ t_norm = np.linalg.norm(t)
+ if not np.isfinite(t_norm) or t_norm == 0:
+ return []
+
+ t = t / t_norm
+ sims = X_bank @ t # cosine since both normalized
+
+ k = min(topk, sims.shape[0])
+ # argpartition is O(N); then sort the small top-k slice
+ idx = np.argpartition(-sims, k - 1)[:k]
+ idx = idx[np.argsort(-sims[idx])]
+
+ out = []
+ for i in idx:
+ m = meta[i]
+ out.append({
+ 'concept': m['concept'],
+ 'row_id': m['row_id'],
+ 'image_path': m['image_path'],
+ 'label': m['label'],
+ 'sim': float(sims[i]),
+ })
+ return out
+
+
+def _knn_weighted_vote(
+ neighbors: list[dict[str, Any]],
+ p: float = 1.0
+) -> tuple[Optional[str], dict[str, float]]:
+ """Weighted majority vote over top-k neighbors.
+
+ Args:
+ neighbors: List of neighbor dicts with 'concept' and 'sim' keys
+ p: Power for weighting (weight = sim^p, negatives clipped to 0)
+
+ Returns:
+ Tuple of (winner_concept, score_dict)
+ """
+ wsum = defaultdict(float)
+ for nb in neighbors:
+ w = max(0.0, nb['sim']) ** p
+ wsum[nb['concept']] += w
+ if not wsum:
+ return None, {}
+ winner = max(wsum.items(), key=lambda kv: kv[1])[0]
+ return winner, dict(wsum)
+
+
+def analyze_target_vs_concepts_with_knn(
+ target_tensors: list[tuple[np.ndarray, Any, int, str]],
+ concept_tensors: list[tuple[np.ndarray, Any, int, str]],
+ layer_name: str,
+ knn_topk: int = 5,
+ knn_power: float = 1.0
+) -> list[dict[str, Any]]:
+ """Analyze similarity between targets and concepts with k-NN instance-based prediction.
+
+ Keeps existing per-prototype stats and centroid metrics.
+ Adds instance-based nearest-neighbor prediction (1-NN + k-NN vote).
+
+ Args:
+ target_tensors: List of target tensor data
+ concept_tensors: List of concept tensor data
+ layer_name: Name of the current layer
+ knn_topk: Number of nearest neighbors to consider
+ knn_power: Power for weighted voting (weight = sim^p)
+
+ Returns:
+ List of analysis results with added 'instance_knn' section
+ """
+ # Group by concept (existing behavior)
+ concept_groups = group_tensors_by_concept(concept_tensors)
+ print(f'Found {len(concept_groups)} concepts: {list(concept_groups.keys())}')
+ for concept, tensors in concept_groups.items():
+ print(f' {concept}: {len(tensors)} images')
+
+ # Precompute centroids (as before)
+ concept_centroids = {}
+ for concept_name, tensor_list in concept_groups.items():
+ vecs = [t[0] for t in tensor_list]
+ if len(vecs) > 0:
+ concept_centroids[concept_name] = np.mean(np.vstack(vecs), axis=0)
+ else:
+ concept_centroids[concept_name] = None
+
+ # NEW: build prototype bank once for this layer
+ X_bank, bank_meta = _build_normalized_prototype_bank(concept_tensors)
+ if X_bank is None:
+ print('Warning: prototype bank is empty for this layer; skipping instance-NN.')
+
+ results = []
+
+ for target_data in target_tensors:
+ target_vec, target_label, target_row_id, target_image_filename = target_data
+
+ target_result = {
+ 'layer': layer_name,
+ 'target_row_id': target_row_id,
+ 'target_label': target_label,
+ 'target_image_filename': target_image_filename,
+ 'concept_analysis': {}, # existing per-concept stats live here
+ 'instance_knn': {} # NEW: instance-based readout lives here
+ }
+
+ # --- Existing per-concept stats (unchanged) ---
+ for concept_name, concept_tensor_list in concept_groups.items():
+ similarities = []
+ for concept_data in concept_tensor_list:
+ concept_vec, concept_label, concept_row_id, concept_image_filename = concept_data
+ if target_vec.shape != concept_vec.shape:
+ print(f'Warning: Shape mismatch between target {target_row_id} and concept {concept_row_id}')
+ continue
+ sim = cosine_similarity_numpy(target_vec, concept_vec)
+ similarities.append(sim)
+
+ concept_stats = {}
+ if similarities:
+ similarities = np.array(similarities)
+ distances = 1.0 - similarities
+ concept_stats.update({
+ 'min_similarity': float(np.min(similarities)),
+ 'max_similarity': float(np.max(similarities)),
+ 'mean_similarity': float(np.mean(similarities)),
+ 'min_distance': float(np.min(distances)),
+ 'mean_distance': float(np.mean(distances)),
+ 'num_comparisons': int(len(similarities)),
+ })
+
+ centroid = concept_centroids.get(concept_name, None)
+ if centroid is not None and centroid.shape == target_vec.shape:
+ cen_sim = cosine_similarity_numpy(target_vec, centroid)
+ cen_ang = float(np.degrees(np.arccos(np.clip(cen_sim, -1.0, 1.0))))
+ concept_stats.update({
+ 'centroid_similarity': float(cen_sim),
+ 'centroid_angular_deg': cen_ang
+ })
+
+ if concept_stats:
+ target_result['concept_analysis'][concept_name] = concept_stats
+
+ # --- NEW: instance-based nearest neighbor prediction ---
+ if X_bank is not None:
+ nbs = _nearest_prototypes(target_vec, X_bank, bank_meta, topk=knn_topk)
+ winner_1nn = nbs[0]['concept'] if nbs else None
+ voted, vote_scores = _knn_weighted_vote(nbs, p=knn_power) if nbs else (None, {})
+
+ target_result['instance_knn'] = {
+ 'top1_concept': winner_1nn,
+ 'top1_similarity': nbs[0]['sim'] if nbs else None,
+ 'topk_neighbors': nbs, # list with concept,row_id,image_path,sim
+ 'topk_voted_concept': voted, # weighted by sim^p over topk (non-negative)
+ 'vote_scores': vote_scores, # dict concept->weight
+ 'topk': knn_topk,
+ 'vote_power': knn_power
+ }
+
+ results.append(target_result)
+
+ target_display = target_image_filename if target_image_filename else f'Target_{target_row_id}'
+ print(f'Analyzed {target_display} against {len(concept_groups)} concepts')
+
+ return results
+
+
+def concept_similarity_analysis_with_knn(
+ target_db_path: str,
+ concept_db_path: str,
+ layer_names: Optional[list[str]] = None,
+ n_pca_components: Optional[int] = None,
+ knn_topk: int = 5,
+ knn_power: float = 1.0,
+ device: str = 'cpu'
+) -> dict[str, dict[str, Any]]:
+ """Main function for concept-based similarity analysis with k-NN prediction.
+
+ Args:
+ target_db_path: Path to target images database
+ concept_db_path: Path to concept images database
+ layer_names: List of layer names to analyze (None for all common layers)
+ n_pca_components: Number of PCA components (None to skip PCA)
+ knn_topk: Number of nearest neighbors for k-NN prediction
+ knn_power: Power for weighted voting in k-NN
+ device: PyTorch device
+
+ Returns:
+ Dictionary of analysis results by layer with k-NN predictions
+ """
+ print('Starting concept-based similarity analysis with k-NN...')
+ print(f'Target DB: {target_db_path}')
+ print(f'Concept DB: {concept_db_path}')
+ print(f'PCA components: {n_pca_components}')
+ print(f'k-NN parameters: topk={knn_topk}, power={knn_power}')
+
+ # Load tensors from both databases (reuse existing function)
+ print(f'\nLoading tensors from {target_db_path}...')
+ target_tensors = load_tensors_by_layer(target_db_path, device)
+
+ print(f'Loading tensors from {concept_db_path}...')
+ concept_tensors = load_tensors_by_layer(concept_db_path, device)
+
+ # Find common layers
+ common_layers = set(target_tensors.keys()) & set(concept_tensors.keys())
+ print(f'\nFound {len(common_layers)} common layers: {sorted(common_layers)}')
+
+ if not common_layers:
+ print('No common layers found between databases!')
+ return {}
+
+ # Determine which layers to analyze
+ if layer_names is None:
+ layers_to_analyze = sorted(common_layers)
+ print('Analyzing all common layers')
+ else:
+ if isinstance(layer_names, str):
+ layer_names = [layer_names]
+ layers_to_analyze = [layer for layer in layer_names if layer in common_layers]
+ print(f'Analyzing specified layers: {layers_to_analyze}')
+
+ # Warn about missing layers
+ missing_layers = set(layer_names) - common_layers
+ if missing_layers:
+ print(f'Warning: Requested layers not found: {missing_layers}')
+
+ if not layers_to_analyze:
+ print('No valid layers to analyze!')
+ return {}
+
+ all_results = {}
+
+ # Process each layer
+ for layer in layers_to_analyze:
+ print(f'\n{"=" * 50}')
+ print(f'Processing Layer: {layer}')
+ print(f'{"=" * 50}')
+
+ target_layer_tensors = target_tensors[layer]
+ concept_layer_tensors = concept_tensors[layer]
+
+ print(f'Target tensors: {len(target_layer_tensors)}')
+ print(f'Concept tensors: {len(concept_layer_tensors)}')
+
+ # Apply PCA if requested (reuse existing function)
+ if n_pca_components is not None:
+ # Import the PCA function
+ from pca import apply_pca_to_layer
+ target_layer_tensors, concept_layer_tensors, pca_model = apply_pca_to_layer(
+ target_layer_tensors, concept_layer_tensors, n_pca_components
+ )
+ else:
+ pca_model = None
+
+ # Analyze similarities with k-NN
+ layer_results = analyze_target_vs_concepts_with_knn(
+ target_layer_tensors, concept_layer_tensors, layer,
+ knn_topk=knn_topk, knn_power=knn_power
+ )
+
+ all_results[layer] = {
+ 'results': layer_results,
+ 'pca_model': pca_model,
+ 'n_pca_components': n_pca_components,
+ 'knn_topk': knn_topk,
+ 'knn_power': knn_power
+ }
+
+ # Print layer summary
+ if layer_results:
+ print(f"\nLayer \'{layer}\' Summary:")
+ print(f' Analyzed {len(layer_results)} target images')
+
+ # Get all concept names from first result
+ if layer_results[0]['concept_analysis']:
+ concept_names = list(layer_results[0]['concept_analysis'].keys())
+ print(f' Against {len(concept_names)} concepts: {concept_names}')
+
+ # Print k-NN summary
+ knn_predictions = []
+ for result in layer_results:
+ ik = result.get('instance_knn', {})
+ if ik.get('top1_concept'):
+ knn_predictions.append(ik['top1_concept'])
+
+ if knn_predictions:
+ from collections import Counter
+ pred_counts = Counter(knn_predictions)
+ print(f' k-NN Predictions: {dict(pred_counts)}')
+
+ return all_results
+
+
+def save_knn_analysis_results(
+ results: dict[str, dict[str, Any]],
+ output_file: str = 'output/knn_similarity_analysis.txt'
+) -> None:
+ """Save k-NN analysis results to a text file.
+
+ Args:
+ results: Dictionary of analysis results by layer
+ output_file: Output filename
+ """
+ import os
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
+
+ with open(output_file, 'w') as f:
+ f.write('VLM Concept Analysis with Instance-based k-NN Prediction\n')
+ f.write('=' * 60 + '\n\n')
+
+ for layer, layer_data in results.items():
+ layer_results = layer_data['results']
+ n_pca_components = layer_data['n_pca_components']
+ knn_topk = layer_data.get('knn_topk', 5)
+ knn_power = layer_data.get('knn_power', 1.0)
+
+ f.write(f'Layer: {layer}\n')
+ if n_pca_components:
+ f.write(f'PCA Components: {n_pca_components}\n')
+ f.write(f'k-NN Parameters: topk={knn_topk}, power={knn_power}\n')
+ f.write('-' * 40 + '\n\n')
+
+ for result in layer_results:
+ target_display = result['target_image_filename'] or f'Target_{result["target_row_id"]}'
+ f.write(f'Target: {target_display}\n')
+
+ # k-NN predictions
+ ik = result.get('instance_knn', {})
+ if ik:
+ f.write(f' 1-NN Concept: {ik.get("top1_concept")} (sim={ik.get("top1_similarity", 0):.4f})\n')
+ if ik.get('topk_voted_concept') is not None and ik.get('topk', 1) > 1:
+ f.write(f' k-NN Vote (k={ik["topk"]}, p={ik["vote_power"]}): {ik["topk_voted_concept"]}\n')
+
+ # Show top neighbors
+ neighbors = ik.get('topk_neighbors', [])
+ if neighbors:
+ f.write(' Top Neighbors:\n')
+ for i, nb in enumerate(neighbors[:3], 1): # Show top 3
+ f.write(f' {i}. {nb["concept"]} (sim={nb["sim"]:.4f})\n')
+
+ # Original concept analysis
+ for concept_name, stats in result['concept_analysis'].items():
+ f.write(f' Concept {concept_name}:\n')
+ if 'centroid_similarity' in stats:
+ f.write(f' Centroid Similarity: {stats["centroid_similarity"]:.4f}\n')
+ if 'mean_similarity' in stats:
+ f.write(f' Mean Similarity: {stats["mean_similarity"]:.4f}\n')
+ f.write('\n')
+
+ f.write('\n')
+
+ print(f'k-NN results saved to {output_file}')
+
+
+def analyze_knn_accuracy(
+ results: dict[str, dict[str, Any]],
+ ground_truth_concept_extractor: Optional[callable] = None
+) -> None:
+ """Analyze k-NN prediction accuracy if ground truth is available.
+
+ Args:
+ results: Dictionary of analysis results by layer
+ ground_truth_concept_extractor: Function to extract true concept from target filename
+ """
+ if ground_truth_concept_extractor is None:
+ ground_truth_concept_extractor = extract_concept_from_filename
+
+ print(f'\n{"=" * 50}')
+ print('k-NN PREDICTION ACCURACY ANALYSIS')
+ print(f'{"=" * 50}')
+
+ for layer, layer_data in results.items():
+ layer_results = layer_data['results']
+ knn_topk = layer_data.get('knn_topk', 5)
+
+ print(f'\nLayer: {layer}')
+ print('-' * 30)
+
+ if not layer_results:
+ print('No results for this layer')
+ continue
+
+ correct_1nn = 0
+ correct_knn = 0
+ total = 0
+
+ for result in layer_results:
+ # Extract ground truth
+ true_concept = ground_truth_concept_extractor(result['target_image_filename'])
+ if not true_concept:
+ continue
+
+ ik = result.get('instance_knn', {})
+ if not ik:
+ continue
+
+ total += 1
+
+ # Check 1-NN accuracy
+ pred_1nn = ik.get('top1_concept')
+ if pred_1nn == true_concept:
+ correct_1nn += 1
+
+ # Check k-NN vote accuracy
+ pred_knn = ik.get('topk_voted_concept')
+ if pred_knn == true_concept:
+ correct_knn += 1
+
+ if total > 0:
+ acc_1nn = correct_1nn / total
+ acc_knn = correct_knn / total
+ print(f' 1-NN Accuracy: {correct_1nn}/{total} = {acc_1nn:.3f}')
+ print(f' k-NN Accuracy (k={knn_topk}): {correct_knn}/{total} = {acc_knn:.3f}')
+ else:
+ print(' No valid predictions to evaluate')
+
+
+if __name__ == '__main__':
+ # Configuration
+ target_db_path = 'output/llava.db'
+ concept_db_path = 'output/llava-concepts-colors.db'
+
+ # Analysis parameters
+ layer_names = None # None for all layers
+ n_pca_components = 5 # None for raw embeddings
+ knn_topk = 5
+ knn_power = 1.0
+
+ print('=' * 60)
+ print('VLM CONCEPT ANALYSIS WITH INSTANCE-BASED k-NN')
+ print('=' * 60)
+
+ try:
+ # Run k-NN analysis
+ results = concept_similarity_analysis_with_knn(
+ target_db_path=target_db_path,
+ concept_db_path=concept_db_path,
+ layer_names=layer_names,
+ n_pca_components=n_pca_components,
+ knn_topk=knn_topk,
+ knn_power=knn_power,
+ device='cpu'
+ )
+
+ if results:
+ # Save detailed results
+ output_file = 'output/knn_similarity_analysis.txt'
+ save_knn_analysis_results(results, output_file)
+
+ # Analyze k-NN accuracy
+ analyze_knn_accuracy(results)
+
+ # Show aggregate trends (reuse existing function)
+ analyze_concept_trends(results)
+
+ print(f'\n{"=" * 50}')
+ print('k-NN ANALYSIS COMPLETE')
+ print(f'{"=" * 50}')
+ print(f'Processed {len(results)} layers')
+ print(f'Results saved to: {output_file}')
+
+ else:
+ print('No results generated. Check database compatibility and parameters.')
+
+ except Exception as e:
+ print(f'Error during analysis: {e}')
+ import traceback
+ traceback.print_exc()
diff --git a/src/concepts/pca_separation.py b/src/concepts/pca_separation.py
new file mode 100644
index 0000000000000000000000000000000000000000..5028be39e74bd1299dec4fed9d422f8e50940d87
--- /dev/null
+++ b/src/concepts/pca_separation.py
@@ -0,0 +1,353 @@
+"""PCA scatter plot visualization for VLM concept analysis.
+
+Creates 2D scatter plots of concepts and targets in PCA space for interpretability.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Optional
+
+import matplotlib.pyplot as plt
+import numpy as np
+from pca import (apply_pca_to_layer, extract_concept_from_filename,
+ group_tensors_by_concept, load_tensors_by_layer)
+
+
+def create_pca_scatter_plots(
+ target_db_path: str,
+ concept_db_path: str,
+ layer_names: Optional[list[str]] = None,
+ output_dir: str = 'output',
+ figsize: tuple[int, int] = (12, 8),
+ alpha: float = 0.7,
+ target_marker_size: int = 100,
+ concept_marker_size: int = 50
+) -> None:
+ """Create 2D PCA scatter plots for concepts and targets.
+
+ Args:
+ target_db_path: Path to target images database
+ concept_db_path: Path to concept images database
+ layer_names: List of layer names to visualize (None for all layers)
+ output_dir: Directory to save plots
+ figsize: Figure size (width, height)
+ alpha: Transparency for concept points
+ target_marker_size: Size of target markers
+ concept_marker_size: Size of concept markers
+ """
+ print('Creating PCA scatter plots...')
+
+ # Load tensors from both databases
+ print(f'Loading tensors from {target_db_path}...')
+ target_tensors = load_tensors_by_layer(target_db_path, 'cpu')
+
+ print(f'Loading tensors from {concept_db_path}...')
+ concept_tensors = load_tensors_by_layer(concept_db_path, 'cpu')
+
+ # Find common layers
+ common_layers = set(target_tensors.keys()) & set(concept_tensors.keys())
+ print(f'Found {len(common_layers)} common layers: {sorted(common_layers)}')
+
+ if not common_layers:
+ print('No common layers found between databases!')
+ return
+
+ # Determine which layers to visualize
+ if layer_names is None:
+ layers_to_analyze = sorted(common_layers)
+ else:
+ if isinstance(layer_names, str):
+ layer_names = [layer_names]
+ layers_to_analyze = [layer for layer in layer_names if layer in common_layers]
+
+ os.makedirs(output_dir, exist_ok=True)
+
+ # Create plots for each layer
+ for layer in layers_to_analyze:
+ print(f'\nProcessing layer: {layer}')
+
+ target_layer_tensors = target_tensors[layer]
+ concept_layer_tensors = concept_tensors[layer]
+
+ if not target_layer_tensors or not concept_layer_tensors:
+ print(f'Skipping layer {layer} - insufficient data')
+ continue
+
+ # Apply PCA with 2 components
+ print(' Applying PCA with 2 components...')
+ transformed_targets, transformed_concepts, pca_model = apply_pca_to_layer(
+ target_layer_tensors, concept_layer_tensors, n_components=2
+ )
+
+ if pca_model is None:
+ print(f' Failed to apply PCA for layer {layer}')
+ continue
+
+ # Group concepts for coloring
+ concept_groups = group_tensors_by_concept(transformed_concepts)
+
+ # Create the plot
+ fig, ax = plt.subplots(figsize=figsize)
+
+ # Define colors for concepts (use a colormap)
+ concept_names = sorted(concept_groups.keys())
+ colors = plt.cm.Set3(np.linspace(0, 1, len(concept_names)))
+ color_map = dict(zip(concept_names, colors))
+
+ # Plot concept prototypes
+ for concept_name, concept_data in concept_groups.items():
+ concept_coords = np.array([data[0] for data in concept_data])
+
+ ax.scatter(
+ concept_coords[:, 0],
+ concept_coords[:, 1],
+ c=[color_map[concept_name]],
+ s=concept_marker_size,
+ alpha=alpha,
+ label=f'{concept_name} (prototypes)',
+ marker='o',
+ edgecolors='white',
+ linewidth=0.5
+ )
+
+ # Plot targets
+ target_coords = np.array([data[0] for data in transformed_targets])
+ target_concepts = []
+
+ # Extract target concepts for coloring
+ for data in transformed_targets:
+ target_concept = extract_concept_from_filename(data[3]) # data[3] is image_filename
+ target_concepts.append(target_concept)
+
+ # Plot targets with concept-based coloring
+ for i, (coord, target_concept) in enumerate(zip(target_coords, target_concepts)):
+ if target_concept in color_map:
+ color = color_map[target_concept]
+ label = f'{target_concept} (target)' if i == 0 or target_concept != target_concepts[i-1] else None
+ else:
+ color = 'black'
+ label = 'Unknown (target)' if i == 0 else None
+
+ ax.scatter(
+ coord[0],
+ coord[1],
+ c=[color],
+ s=target_marker_size,
+ alpha=0.9,
+ marker='^', # Triangle for targets
+ edgecolors='black',
+ linewidth=1.0,
+ label=label
+ )
+
+ # Customize the plot
+ ax.set_xlabel(f'PC1 ({pca_model.explained_variance_ratio_[0]:.3f} variance explained)')
+ ax.set_ylabel(f'PC2 ({pca_model.explained_variance_ratio_[1]:.3f} variance explained)')
+ ax.set_title(f'PCA Visualization: Concepts vs Targets\nLayer: {layer}')
+ ax.grid(True, alpha=0.3)
+
+ # Create legend with better organization
+ handles, labels = ax.get_legend_handles_labels()
+
+ # Separate prototype and target entries
+ prototype_handles, prototype_labels = [], []
+ target_handles, target_labels = [], []
+
+ for handle, label in zip(handles, labels):
+ if '(prototypes)' in label:
+ prototype_handles.append(handle)
+ prototype_labels.append(label.replace(' (prototypes)', ''))
+ elif '(target)' in label:
+ target_handles.append(handle)
+ target_labels.append(label.replace(' (target)', ''))
+
+ # Create two-column legend
+ if prototype_handles and target_handles:
+ legend1 = ax.legend(
+ prototype_handles,
+ [f'{label} (â—‹)' for label in prototype_labels],
+ title='Concept Prototypes',
+ loc='upper left',
+ bbox_to_anchor=(1.02, 1.0),
+ fontsize=9
+ )
+ ax.add_artist(legend1)
+
+ ax.legend(
+ target_handles,
+ [f'{label} (â–³)' for label in target_labels],
+ title='Target Images',
+ loc='upper left',
+ bbox_to_anchor=(1.02, 0.6),
+ fontsize=9
+ )
+ else:
+ ax.legend(bbox_to_anchor=(1.02, 1.0), loc='upper left', fontsize=9)
+
+ # Add statistics text
+ stats_text = (
+ f'Total variance explained: {pca_model.explained_variance_ratio_.sum():.3f}\n'
+ f'Concepts: {len(concept_groups)}\n'
+ f'Prototypes: {len(transformed_concepts)}\n'
+ f'Targets: {len(transformed_targets)}'
+ )
+
+ ax.text(
+ 0.02, 0.98,
+ stats_text,
+ transform=ax.transAxes,
+ verticalalignment='top',
+ bbox=dict(boxstyle='round', facecolor='white', alpha=0.8),
+ fontsize=9
+ )
+
+ plt.tight_layout()
+
+ # Save plot
+ plot_filename = f'{output_dir}/pca_scatter_layer_{layer.replace("/", "_")}.png'
+ plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
+ plt.close()
+
+ print(f' Plot saved: {plot_filename}')
+
+ # Print summary statistics
+ print(f' Variance explained: PC1={pca_model.explained_variance_ratio_[0]:.3f}, '
+ f'PC2={pca_model.explained_variance_ratio_[1]:.3f}, '
+ f'Total={pca_model.explained_variance_ratio_.sum():.3f}')
+ print(f' Plotted {len(concept_groups)} concept groups with {len(transformed_concepts)} prototypes')
+ print(f' Plotted {len(transformed_targets)} target images')
+
+ print(f'\nPCA scatter plots complete. Plots saved in {output_dir}/')
+
+
+def create_concept_separation_analysis(
+ target_db_path: str,
+ concept_db_path: str,
+ layer_names: Optional[list[str]] = None,
+ output_dir: str = 'output'
+) -> None:
+ """Analyze concept separation in PCA space.
+
+ Args:
+ target_db_path: Path to target images database
+ concept_db_path: Path to concept images database
+ layer_names: List of layer names to analyze (None for all layers)
+ output_dir: Directory to save analysis
+ """
+ print('\nAnalyzing concept separation in PCA space...')
+
+ # Load tensors
+ target_tensors = load_tensors_by_layer(target_db_path, 'cpu')
+ concept_tensors = load_tensors_by_layer(concept_db_path, 'cpu')
+
+ common_layers = set(target_tensors.keys()) & set(concept_tensors.keys())
+
+ if layer_names is None:
+ layers_to_analyze = sorted(common_layers)
+ else:
+ if isinstance(layer_names, str):
+ layer_names = [layer_names]
+ layers_to_analyze = [layer for layer in layer_names if layer in common_layers]
+
+ os.makedirs(output_dir, exist_ok=True)
+
+ with open(f'{output_dir}/pca_separation_analysis.txt', 'w') as f:
+ f.write('PCA Concept Separation Analysis\n')
+ f.write('=' * 40 + '\n\n')
+
+ for layer in layers_to_analyze:
+ target_layer_tensors = target_tensors[layer]
+ concept_layer_tensors = concept_tensors[layer]
+
+ if not concept_layer_tensors:
+ continue
+
+ # Apply PCA
+ _, transformed_concepts, pca_model = apply_pca_to_layer(
+ target_layer_tensors, concept_layer_tensors, n_components=2
+ )
+
+ if pca_model is None:
+ continue
+
+ f.write(f'Layer: {layer}\n')
+ f.write('-' * 20 + '\n')
+
+ # Group concepts
+ concept_groups = group_tensors_by_concept(transformed_concepts)
+
+ # Calculate concept centroids in PCA space
+ concept_centroids = {}
+ for concept_name, concept_data in concept_groups.items():
+ coords = np.array([data[0] for data in concept_data])
+ concept_centroids[concept_name] = np.mean(coords, axis=0)
+
+ # Calculate pairwise distances between concept centroids
+ concept_names = list(concept_centroids.keys())
+ f.write('Concept centroid distances in PC1-PC2 space:\n')
+
+ for i, concept1 in enumerate(concept_names):
+ for j, concept2 in enumerate(concept_names[i+1:], i+1):
+ centroid1 = concept_centroids[concept1]
+ centroid2 = concept_centroids[concept2]
+ distance = np.linalg.norm(centroid1 - centroid2)
+ f.write(f' {concept1} - {concept2}: {distance:.3f}\n')
+
+ # Calculate within-concept scatter
+ f.write('\nWithin-concept scatter (std dev):\n')
+ for concept_name, concept_data in concept_groups.items():
+ coords = np.array([data[0] for data in concept_data])
+ if len(coords) > 1:
+ std_pc1 = np.std(coords[:, 0])
+ std_pc2 = np.std(coords[:, 1])
+ f.write(f' {concept_name}: PC1={std_pc1:.3f}, PC2={std_pc2:.3f}\n')
+
+ f.write('\nPCA Statistics:\n')
+ f.write(f' PC1 variance explained: {pca_model.explained_variance_ratio_[0]:.3f}\n')
+ f.write(f' PC2 variance explained: {pca_model.explained_variance_ratio_[1]:.3f}\n')
+ f.write(f' Total variance explained: {pca_model.explained_variance_ratio_.sum():.3f}\n')
+ f.write('\n\n')
+
+ print(f'Separation analysis saved to {output_dir}/pca_separation_analysis.txt')
+
+
+if __name__ == '__main__':
+ # Configuration
+ target_db_path = 'output/llava.db'
+ concept_db_path = 'output/llava-concepts-colors.db'
+
+ # Visualization parameters
+ layer_names = None # None for all layers, or specify: ['layer_name1', 'layer_name2']
+
+ print('=' * 60)
+ print('VLM PCA VISUALIZATION')
+ print('=' * 60)
+
+ try:
+ # Create scatter plots
+ create_pca_scatter_plots(
+ target_db_path=target_db_path,
+ concept_db_path=concept_db_path,
+ layer_names=layer_names,
+ output_dir='output',
+ figsize=(12, 8),
+ alpha=0.7,
+ target_marker_size=100,
+ concept_marker_size=50
+ )
+
+ # Analyze concept separation
+ create_concept_separation_analysis(
+ target_db_path=target_db_path,
+ concept_db_path=concept_db_path,
+ layer_names=layer_names,
+ output_dir='output'
+ )
+
+ print('\nVisualization complete!')
+
+ except Exception as e:
+ print(f'Error during visualization: {e}')
+ import traceback
+ traceback.print_exc()
diff --git a/src/concepts/requirements.txt b/src/concepts/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94657cd1314278d28f55cc56a76849e4f7afec96
--- /dev/null
+++ b/src/concepts/requirements.txt
@@ -0,0 +1,5 @@
+joblib==1.5.2
+scikit-learn==1.7.1
+scipy==1.15.3
+threadpoolctl==3.6.0
+seaborn==0.13.2
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed91130f8df33eaed041b214aa3058a2e9fcf3e0
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,80 @@
+"""main.py.
+
+This module here is the entrypoint to the VLM Lens toolkit.
+"""
+import logging
+
+from src.models.base import ModelBase
+from src.models.config import Config, ModelSelection
+
+
+def get_model(
+ model_arch: ModelSelection,
+ config: Config
+) -> ModelBase:
+ """Returns the model based on the selection enum chosen.
+
+ Args:
+ model_arch (ModelSelection): ModelSelection enum chosen for the specific architecture.
+ config (Config): The configuration object.
+
+ Returns:
+ ModelBase: A model of type ModelBase which implements the runtime
+ """
+ if model_arch == ModelSelection.LLAVA:
+ from src.models.llava import LlavaModel
+ return LlavaModel(config)
+ elif model_arch == ModelSelection.QWEN:
+ from src.models.qwen import QwenModel
+ return QwenModel(config)
+ elif model_arch == ModelSelection.CLIP:
+ from src.models.clip import ClipModel
+ return ClipModel(config)
+ elif model_arch == ModelSelection.GLAMM:
+ from src.models.glamm import GlammModel
+ return GlammModel(config)
+ elif model_arch == ModelSelection.JANUS:
+ from src.models.janus import JanusModel
+ return JanusModel(config)
+ elif model_arch == ModelSelection.BLIP2:
+ from src.models.blip2 import Blip2Model
+ return Blip2Model(config)
+ elif model_arch == ModelSelection.MOLMO:
+ from src.models.molmo import MolmoModel
+ return MolmoModel(config)
+ elif model_arch == ModelSelection.PALIGEMMA:
+ from src.models.paligemma import PaligemmaModel
+ return PaligemmaModel(config)
+ elif model_arch == ModelSelection.INTERNLM_XC:
+ from src.models.internlm_xc import InternLMXComposerModel
+ return InternLMXComposerModel(config)
+ elif model_arch == ModelSelection.INTERNVL:
+ from src.models.internvl import InternVLModel
+ return InternVLModel(config)
+ elif model_arch == ModelSelection.MINICPM:
+ from src.models.minicpm import MiniCPMModel
+ return MiniCPMModel(config)
+ elif model_arch == ModelSelection.COGVLM:
+ from src.models.cogvlm import CogVLMModel
+ return CogVLMModel(config)
+ elif model_arch == ModelSelection.PIXTRAL:
+ from src.models.pixtral import PixtralModel
+ return PixtralModel(config)
+ elif model_arch == ModelSelection.AYA_VISION:
+ from src.models.aya_vision import AyaVisionModel
+ return AyaVisionModel(config)
+ elif model_arch == ModelSelection.PLM:
+ from src.models.plm import PlmModel
+ return PlmModel(config)
+
+
+if __name__ == '__main__':
+ logging.getLogger().setLevel(logging.INFO)
+ config = Config()
+ logging.debug(
+ f'Config is set to '
+ f'{[(key, value) for key, value in config.__dict__.items()]}'
+ )
+
+ model = get_model(config.architecture, config)
+ model.run()
diff --git a/src/models/__init__.py b/src/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b8fb8241d1eca40194ef5ea589257091e6830ea
--- /dev/null
+++ b/src/models/__init__.py
@@ -0,0 +1,5 @@
+"""models package.
+
+This package provides all implementation of all supported models and
+the base implementation under base.py
+"""
diff --git a/src/models/__pycache__/__init__.cpython-310.pyc b/src/models/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..145badff7347b43483c1026f7d6ee511e0a81ccc
Binary files /dev/null and b/src/models/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/__pycache__/base.cpython-310.pyc b/src/models/__pycache__/base.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b399b8f230633e3448fe986d4511491602ae27e1
Binary files /dev/null and b/src/models/__pycache__/base.cpython-310.pyc differ
diff --git a/src/models/__pycache__/config.cpython-310.pyc b/src/models/__pycache__/config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..938176744e2814704575786ba44a5a744a82bfef
Binary files /dev/null and b/src/models/__pycache__/config.cpython-310.pyc differ
diff --git a/src/models/aya_vision/__init__.py b/src/models/aya_vision/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..860abc7a64ee3efbe35bf6dbd18c48bd2904b081
--- /dev/null
+++ b/src/models/aya_vision/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+AyaVision package, includes which of this module to export.
+"""
+from .aya_vision import AyaVisionModel
+
+__all__ = [
+ 'AyaVisionModel'
+]
diff --git a/src/models/aya_vision/__pycache__/__init__.cpython-310.pyc b/src/models/aya_vision/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..077a4d0432a8abcb68a3f1df5ec3b63c45d8c416
Binary files /dev/null and b/src/models/aya_vision/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/aya_vision/__pycache__/aya_vision.cpython-310.pyc b/src/models/aya_vision/__pycache__/aya_vision.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e039f61a97649a822d22cf9b3c034b8f7bc7210a
Binary files /dev/null and b/src/models/aya_vision/__pycache__/aya_vision.cpython-310.pyc differ
diff --git a/src/models/aya_vision/aya_vision.py b/src/models/aya_vision/aya_vision.py
new file mode 100644
index 0000000000000000000000000000000000000000..db5994b70584d5b3e3691d6c2dbd5ff19396b27e
--- /dev/null
+++ b/src/models/aya_vision/aya_vision.py
@@ -0,0 +1,28 @@
+"""aya_vision.py.
+
+File for providing the AyaVision model implementation.
+"""
+
+from transformers import AutoModelForImageTextToText
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class AyaVisionModel(ModelBase):
+ """AyaVision model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the AyaVision model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Load the AyaVision model with proper configuration."""
+ self.model = AutoModelForImageTextToText.from_pretrained(
+ self.model_path, **getattr(self.config, 'model', {})
+ )
diff --git a/src/models/base.py b/src/models/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c7f427b14a2d7eaa3e98406b77bd474ff15dfb6
--- /dev/null
+++ b/src/models/base.py
@@ -0,0 +1,431 @@
+"""base.py.
+
+Provides the common classes used such as the ModelSelection enum as well as the
+abstract base class for models.
+"""
+import io
+import logging
+import os
+import sqlite3
+from abc import ABC, abstractmethod
+from collections.abc import Iterator
+from typing import Callable, List, Optional, TypedDict
+
+import torch
+import tqdm
+from PIL import Image
+from transformers import AutoProcessor
+from transformers.feature_extraction_utils import BatchFeature
+
+from .config import Config
+
+
+class ModelInput(TypedDict):
+ """Definition for the general model input dictionary."""
+ image: str | Image.Image
+ prompt: str
+ label: Optional[str]
+ data: BatchFeature
+ row_id: Optional[str]
+
+
+class ModelBase(ABC):
+ """Provides an abstract base class for everything to implement."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the model base class.
+
+ Args:
+ config (Config): Parsed config.
+ """
+ self.model_path = config.model_path
+ self.config = config
+
+ # log the modules -- note that this causes an exit
+ if self.config.log_named_modules:
+ self._log_named_modules()
+ exit(0)
+
+ # load the specific model
+ logging.debug(
+ f'Loading model {self.config.architecture.value}; {self.model_path}'
+ )
+ self._load_specific_model()
+
+ # load the processor
+ self._init_processor()
+
+ def _log_named_modules(self) -> None:
+ """Logs the named modules based on the loaded model."""
+ file_path = 'logs/' + self.model_path + '.txt'
+ directory_path = os.path.dirname(file_path)
+
+ # if the path exists to the file, don't load the model again
+ if os.path.isfile(file_path):
+ logging.debug(f'Named modules are cached in {file_path}')
+ return
+
+ # in which case, we first load the model, then output its modules
+ self._load_specific_model()
+
+ # otherwise, we log the output to that file, and creating directories
+ # as needed
+ if not os.path.exists(directory_path):
+ os.makedirs(directory_path)
+
+ with open(file_path, 'w') as output_file:
+ output_file.writelines(
+ [f'{name}\n' for name, _ in self.model.named_modules()]
+ )
+
+ @abstractmethod
+ def _load_specific_model(self) -> None:
+ """Abstract method that loads the specific model."""
+ pass
+
+ def _init_processor(self) -> None:
+ """Initialize the self.processor by loading from the path."""
+ self.processor = AutoProcessor.from_pretrained(self.model_path)
+
+ def _generate_state_hook(self,
+ name: str,
+ model_input: ModelInput
+ ) -> Callable[[torch.nn.Module, tuple, torch.Tensor], None]:
+ """Generates the state hook depending on the embedding type.
+
+ Args:
+ name (str): The module name.
+ model_input (ModelInput): The input dictionary
+ containing the image path, prompt, label (if applicable) and
+ the data itself.
+
+ Returns:
+ hook function: The hook function to return.
+ """
+ image_path, prompt = model_input['image'], model_input['prompt']
+ label = model_input.get('label', None)
+ row_id = model_input.get('row_id', None)
+
+ # Modify image path to be an absolute path if necessary
+ if isinstance(image_path, str) and image_path != self.config.NO_IMG_PROMPT:
+ image_path = os.path.abspath(image_path)
+
+ # this image path should already exist, error out if someone isn't
+ # properly providing an image path
+ assert os.path.exists(image_path)
+
+ def generate_states_hook(module: torch.nn.Module, input: tuple, output: torch.Tensor) -> None:
+ """Hook handle function that saves the embedding output to a tensor.
+
+ This tensor will be saved within a SQL database, according to the
+ connection that was initialized previously.
+
+ Args:
+ module (torch.nn.Module): The module that save its hook on.
+ input (tuple): The input used.
+ output (torch.Tensor): The embeddings to save.
+ """
+ if not isinstance(output, torch.Tensor):
+ logging.warning(f'Output type of {str(type(module))} is not a tensor, skipped.')
+ return
+
+ cursor = self.connection.cursor()
+
+ # Convert the tensor to a binary blob
+ tensor_blob = io.BytesIO()
+
+ # It currently averages the output across the sequence length dimension, i.e., mean pooling
+ # WARNING: When contributing new models, ensure that dim 1 is always the sequence length dimension
+ final_output = getattr(output, self.config.pooling_method)(dim=1) if hasattr(
+ self.config, 'pooling_method') and hasattr(output, self.config.pooling_method) else output
+ output_dim = final_output.shape[-1]
+ torch.save(final_output, tensor_blob)
+
+ # Insert the tensor into the table
+ cursor.execute(f"""
+ INSERT INTO {self.config.DB_TABLE_NAME}
+ (name, architecture, image_path, image_id, prompt, label, layer, pooling_method, tensor_dim, tensor)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
+ """, (
+ self.model_path,
+ self.config.architecture.value,
+ image_path if isinstance(image_path, str) else None,
+ row_id,
+ prompt,
+ label,
+ name,
+ self.config.pooling_method if hasattr(self.config, 'pooling_method') else None,
+ output_dim,
+ tensor_blob.getvalue())
+ )
+
+ self.connection.commit()
+
+ logging.debug(
+ f'Ran hook and saved tensor for {image_path} using prompt '
+ f'{prompt} on layer {name}.'
+ )
+
+ return generate_states_hook
+
+ def _register_module_hooks(self,
+ model_input: ModelInput
+ ) -> List[torch.utils.hooks.RemovableHandle]:
+ """Register the generated hook function to the modules in the config.
+
+ At the same time, we need to add in the image path itself and the prompt
+ which will be used for the database input.
+
+ Args:
+ model_input (ModelInput): The input dictionary
+ containing the image path, prompt, label (if applicable) and
+ the data itself.
+
+ Raises:
+ RuntimeError: Calls a runtime error if no hooks were registered
+
+ Returns:
+ List[torch.utils.hooks.RemovableHandle]: A list of handles that one
+ can remove after the forward pass.
+ """
+ logging.debug(
+ f'Registering module hook for {model_input["image"]} using prompt "{model_input["prompt"]}"'
+ )
+
+ # a list of hooks to remove after the forward pass
+ hooks = []
+
+ # for each module, register the state hook and save the output to database
+ for name, module in self.model.named_modules():
+ if self.config.matches_module(name):
+ hooks.append(module.register_forward_hook(
+ self._generate_state_hook(name, model_input)
+ ))
+ logging.debug(f'Registered hook to {name}')
+
+ if len(hooks) == 0:
+ raise RuntimeError(
+ 'No hooks were registered. Double-check the configured modules.'
+ )
+
+ return hooks
+
+ def _forward(self, data: BatchFeature) -> None:
+ """Given some input data, performs a single forward pass.
+
+ This function itself can be overriden, while _hook_and_eval
+ should be left in tact.
+
+ Args:
+ data (BatchFeature): The given data tensor.
+ """
+ data.to(self.config.device)
+ with torch.no_grad():
+ _ = self.model(**data)
+ logging.debug('Completed forward pass...')
+
+ def _hook_and_eval(self, model_input: ModelInput) -> None:
+ """Given some input, performs a single forward pass.
+
+ Args:
+ model_input (ModelInput): The given input dictionary.
+ """
+ logging.debug('Starting forward pass')
+ self.model.eval()
+
+ # now set up the modules to register the hook to
+ hooks = self._register_module_hooks(model_input)
+
+ # then ensure that the data is correct
+ self._forward(model_input['data'])
+
+ for hook in hooks:
+ hook.remove()
+ logging.debug('Unregistered all hooks..')
+
+ def _initialize_db(self) -> None:
+ """Initializes a database based on config."""
+ # Connect to the database, creating it if it doesn't exist
+ self.connection = sqlite3.connect(self.config.output_db)
+ logging.debug(f'Database created at {self.config.output_db}')
+
+ cursor = self.connection.cursor()
+
+ # Create a table
+ cursor.execute(
+ f"""
+ CREATE TABLE IF NOT EXISTS {self.config.DB_TABLE_NAME} (
+ id INTEGER PRIMARY KEY,
+ name TEXT NOT NULL,
+ architecture TEXT NOT NULL,
+ timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
+ image_path TEXT NULL,
+ image_id INTEGER NULL,
+ prompt TEXT NOT NULL,
+ label TEXT NULL,
+ layer TEXT NOT NULL,
+ pooling_method TEXT NULL,
+ tensor_dim INTEGER NOT NULL,
+ tensor BLOB NOT NULL
+ );
+ """
+ )
+
+ def _cleanup(self) -> None:
+ """Cleanups the database by closing the connection."""
+ self.connection.close()
+
+ def _generate_processor_output(self, prompt: str, img_path: str | Image.Image) -> dict:
+ """Generate the processor outputs from the prompt and image path.
+
+ Args:
+ prompt (str): The generated prompt string with the input text and
+ the image labels.
+ img_path (str | Image.Image): The specified input image path or image object.
+
+ Returns:
+ dict: The corresponding processor output per image and prompt.
+ """
+ data = {
+ 'text': prompt,
+ 'return_tensors': 'pt'
+ }
+
+ if img_path:
+ img = Image.open(img_path) if isinstance(img_path, str) else img_path
+ data['images'] = [img.convert('RGB')]
+
+ return self.processor(**data)
+
+ def _generate_prompt(self, prompt: str, add_generation_prompt: bool = True, has_images: bool = False) -> str:
+ """Generates the prompt string with the input messages.
+
+ TODO: move `add_generation_prompt` to the config.
+ [Note from Martin] I'd argue that we should keep it as a parameter here
+ since in gradio we want to hack these parameters a bit.
+
+ Args:
+ prompt (str): The input prompt string.
+ add_generation_prompt (bool): Whether to add a start token of a bot
+ response.
+ has_images (bool): Whether the model has images or not.
+
+ Returns:
+ str: The generated prompt with the input text and the image labels.
+ """
+ logging.debug('Loading data...')
+ # build the input dict for the chat template
+ input_msgs_formatted = [{
+ 'role': 'user',
+ 'content': []
+ }]
+
+ # add the image if it exists
+ if self.config.has_images() or has_images:
+ input_msgs_formatted[0]['content'].append({
+ 'type': 'image'
+ })
+
+ # add the prompt if it exists
+ if prompt:
+ input_msgs_formatted[0]['content'].append({
+ 'type': 'text',
+ 'text': prompt
+ })
+
+ # apply the chat template to get the prompt
+ return self.processor.apply_chat_template(
+ input_msgs_formatted,
+ add_generation_prompt=add_generation_prompt
+ )
+
+ def _load_input_data(self) -> Iterator[ModelInput]:
+ """From a configuration, loads the input image and text data.
+
+ For each prompt and input image, create a separate batch feature that
+ will be ran separately and saved separately within the database.
+
+ Yields:
+ List[ModelInput]: List of input data, this input data is made of
+ a tuple of strings (first an image path, then a prompt) and
+ a batch feature which is either a torch.Tensor or a dictionary.
+ """
+ # by default use the processor, which may not exist for each model
+ logging.debug('Generating embeddings through its processor...')
+ if self.config.dataset:
+ # Use the dataset to load input data, which includes (id, prompt, image_path)
+ for row in self.config.dataset:
+ prompt = self._generate_prompt(row['prompt'])
+ data = self._generate_processor_output(
+ prompt=prompt,
+ img_path=row['image']
+ )
+
+ yield {
+ 'image': row['image'],
+ 'prompt': row['prompt'],
+ 'label': row['label'] if 'label' in self.config.dataset.column_names else None,
+ 'data': data,
+ 'row_id': row['id'],
+ }
+
+ else:
+ if not self.config.has_images():
+ yield {
+ 'image': self.config.NO_IMG_PROMPT, # TODO: Check this?
+ 'prompt': self.config.prompt,
+ 'data': self._generate_processor_output(
+ prompt=self._generate_prompt(),
+ img_path=None
+ )
+ }
+ else:
+ prompt = self._generate_prompt(self.config.prompt)
+ for img_path in self.config.image_paths:
+ data = self._generate_processor_output(
+ prompt=prompt,
+ img_path=img_path
+ )
+ yield {
+ 'image': img_path,
+ 'prompt': self.config.prompt,
+ 'data': data
+ }
+
+ @property
+ def _data_size(self) -> int:
+ """Returns the total number of data points.
+
+ Returns:
+ int: The total number of data points.
+ """
+ if self.config.dataset:
+ return len(self.config.dataset)
+ else:
+ if not self.config.has_images():
+ return 1
+ else:
+ return len(self.config.image_paths)
+
+ def run(self) -> None:
+ """Get the hidden states from the model and saving them."""
+ # let's first initialize a database connection
+ self._initialize_db()
+
+ # then convert to gpu
+ self.model.to(self.config.device)
+
+ # then reset the starting point in tracking maximum GPU memory, if using cuda
+ if self.config.device.type == 'cuda':
+ torch.cuda.reset_peak_memory_stats(self.config.device)
+
+ # then run everything else
+ for item in tqdm.tqdm(self._load_input_data(), desc='Running forward hooks on data', total=self._data_size):
+ self._hook_and_eval(item)
+
+ # then output peak memory usage, if using cuda
+ if self.config.device.type == 'cuda':
+ logging.debug(f'Peak GPU memory allocated: {torch.cuda.max_memory_allocated(self.config.device) / 1e6:.2f} MB')
+
+ # finally clean up, closing database connection, etc.
+ self._cleanup()
diff --git a/src/models/blip2/__init__.py b/src/models/blip2/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..18037ced3ac0754d8a3332df0252f8701c21a8ea
--- /dev/null
+++ b/src/models/blip2/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Blip-2 package, includes which of this module to export.
+"""
+from .blip2 import Blip2Model
+
+__all__ = [
+ 'Blip2Model'
+]
diff --git a/src/models/blip2/blip2.py b/src/models/blip2/blip2.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae247c9bbfa1dbd337073ffe1ea1c3656ad6f8f4
--- /dev/null
+++ b/src/models/blip2/blip2.py
@@ -0,0 +1,43 @@
+"""blip2.py.
+
+File for providing the Blip2 model implementation.
+"""
+
+from transformers import Blip2ForConditionalGeneration
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class Blip2Model(ModelBase):
+ """Blip-2 model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the Blip-2 model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = Blip2ForConditionalGeneration.from_pretrained(
+ self.model_path, **self.config.model
+ ) if hasattr(self.config, 'model') else (
+ Blip2ForConditionalGeneration.from_pretrained(
+ self.model_path
+ )
+ )
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Generates the BLIP-2 model prompt which will not use the chat template.
+
+ Args:
+ prompt (str): The input prompt to be processed.
+
+ Returns:
+ str: The prompt to return, set by the config.
+ """
+ return prompt
diff --git a/src/models/clip/__init__.py b/src/models/clip/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e54cc1496077ca3dcd167c28a333f8c499abb859
--- /dev/null
+++ b/src/models/clip/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Clip package, includes which of this module to export.
+"""
+from .clip import ClipModel
+
+__all__ = [
+ 'ClipModel'
+]
diff --git a/src/models/clip/clip.py b/src/models/clip/clip.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a12206595f87fc12b7e9f5f0935214c17487aaf
--- /dev/null
+++ b/src/models/clip/clip.py
@@ -0,0 +1,43 @@
+"""clip.py.
+
+File for providing the Clip model implementation.
+"""
+
+from transformers import CLIPModel
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class ClipModel(ModelBase):
+ """Clip model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the clip model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = CLIPModel.from_pretrained(
+ self.model_path, **self.config.model
+ ) if hasattr(self.config, 'model') else (
+ CLIPModel.from_pretrained(
+ self.model_path
+ )
+ )
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Generates the CLIP model prompt which will not use the chat template.
+
+ Args:
+ prompt (str): The prompt to return, set by the config.
+
+ Returns:
+ str: The prompt to return, set by the config.
+ """
+ return prompt
diff --git a/src/models/cogvlm/__init__.py b/src/models/cogvlm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3295df6368579d6c55719a3d8fe8d3d5dc5f726f
--- /dev/null
+++ b/src/models/cogvlm/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+CogVLM package, includes which of this module to export.
+"""
+from .cogvlm import CogVLMModel
+
+__all__ = [
+ 'CogVLMModel'
+]
diff --git a/src/models/cogvlm/__pycache__/__init__.cpython-310.pyc b/src/models/cogvlm/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d987dfed16aff2b1a125b3d71791bb0edda32508
Binary files /dev/null and b/src/models/cogvlm/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/cogvlm/__pycache__/cogvlm.cpython-310.pyc b/src/models/cogvlm/__pycache__/cogvlm.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4b0b499d7115af88a9a602aed96dd9a648cc3cef
Binary files /dev/null and b/src/models/cogvlm/__pycache__/cogvlm.cpython-310.pyc differ
diff --git a/src/models/cogvlm/cogvlm.py b/src/models/cogvlm/cogvlm.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3eadf31acc0b8ec6d0045c0d11a6b6b1118e937
--- /dev/null
+++ b/src/models/cogvlm/cogvlm.py
@@ -0,0 +1,121 @@
+"""cogvlm.py.
+
+File for providing the CogVLM model implementation.
+"""
+import logging
+
+import torch
+from PIL import Image
+from transformers import AutoModelForCausalLM, LlamaTokenizer
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class CogVLMModel(ModelBase):
+ """CogVLM model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the CogVLM model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = AutoModelForCausalLM.from_pretrained(
+ self.model_path,
+ torch_dtype=torch.bfloat16,
+ low_cpu_mem_usage=self.config.model['low_cpu_mem_usage'],
+ trust_remote_code=self.config.model['trust_remote_code']
+ ) if hasattr(self.config, 'model') else (
+ AutoModelForCausalLM.from_pretrained(
+ self.model_path,
+ torch_dtype=torch.bfloat16,
+ low_cpu_mem_usage=True,
+ trust_remote_code=True
+ )
+ )
+
+ def _init_processor(self) -> None:
+ """Initialize the CogVLM processor.
+
+ Follows the processor setting and tokenizers under:
+ https://huggingface.co/THUDM/cogvlm-chat-hf
+ """
+ self.processor = None # no intended processor here
+ self.tokenizer = LlamaTokenizer.from_pretrained(
+ self.config.model['tokenizer_path'],
+ legacy=self.config.model['legacy']
+ )
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Generates the CogVLM model prompt which will not use the chat template.
+
+ Args:
+ prompt (str): The input prompt for the model.
+
+ Returns:
+ str: The prompt to return, set by the config.
+ """
+ return prompt
+
+ def _generate_processor_output(self, prompt: str, img_path: str | None) -> dict:
+ """Generate the processor outputs from the prompt and image path.
+
+ Args:
+ prompt (str): The generated prompt string with the input text and
+ the image labels.
+ img_path (str): The specified image path.
+
+ Returns:
+ dict: The corresponding processor output per image and prompt.
+
+ Raises:
+ ValueError: If the image path is not defined.
+ """
+ if img_path is None:
+ raise ValueError('Define input image directory in model config.')
+
+ image = Image.open(img_path).convert('RGB')
+
+ # build input data
+ input_ids = self.model.build_conversation_input_ids(
+ self.tokenizer,
+ query=prompt,
+ history=[],
+ images=[image],
+ template_version=self.config.model['template_version']
+ )
+
+ return {
+ 'input_ids': input_ids['input_ids'].unsqueeze(0),
+ 'token_type_ids': input_ids['token_type_ids'].unsqueeze(0),
+ 'attention_mask': input_ids['attention_mask'].unsqueeze(0),
+ 'images': input_ids['images'][0].to(torch.bfloat16),
+ }
+
+ def _forward(self, data: dict) -> None:
+ """Given some input data, performs a single forward pass.
+
+ This function itself can be overriden, while _hook_and_eval
+ should be left in tact.
+
+ Args:
+ data (dict): The given data tensor.
+ """
+ gen_kwargs = self.config.forward
+
+ with torch.no_grad():
+ _ = self.model.generate(
+ input_ids=data['input_ids'].to(self.config.device),
+ token_type_ids=data['token_type_ids'].to(self.config.device),
+ attention_mask=data['attention_mask'].to(self.config.device),
+ images=[[data['images'].to(self.config.device)]],
+ **gen_kwargs
+ )
+
+ logging.debug('Completed forward pass...')
diff --git a/src/models/config.py b/src/models/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..714811e0689e8b01f16f667cc885eac3a125a7f2
--- /dev/null
+++ b/src/models/config.py
@@ -0,0 +1,358 @@
+"""config.py.
+
+This module provides a config class to be used for both the parser as well as
+for providing the model specific classes a way to access the parsed arguments.
+"""
+import argparse
+import logging
+import os
+import sys
+from enum import Enum
+from pathlib import Path
+from typing import List, Optional
+
+import regex as re
+import torch
+import yaml
+from datasets import load_dataset, load_from_disk
+
+PROJECT_ROOT = Path(__file__).resolve().parents[2] # models -> src -> root
+sys.path.append(str(PROJECT_ROOT))
+
+
+class ModelSelection(str, Enum):
+ """Enum that contains all possible model choices."""
+ LLAVA = 'llava'
+ QWEN = 'qwen'
+ CLIP = 'clip'
+ GLAMM = 'glamm'
+ JANUS = 'janus'
+ BLIP2 = 'blip2'
+ MOLMO = 'molmo'
+ PALIGEMMA = 'paligemma'
+ INTERNLM_XC = 'internlm-xcomposer'
+ INTERNVL = 'internvl'
+ MINICPM = 'minicpm'
+ COGVLM = 'cogvlm'
+ PIXTRAL = 'pixtral'
+ AYA_VISION = 'aya-vision'
+ PLM = 'plm'
+
+
+class Config:
+ """Config class for both yaml and cli arguments."""
+
+ def __init__(self,
+ architecture: Optional[str] = None,
+ model_path: Optional[str] = None,
+ module: Optional[str] = None,
+ prompt: Optional[str] = None) -> None:
+ """Verifies the passed arguments while populating config fields.
+
+ Args:
+ architecture (Optional[str]): The model architecture to use.
+ model_path (Optional[str]): The specific model path to use.
+ module (Optional[str]): The specific module to extract embeddings from.
+ prompt (Optional[str]): The prompt to use for models that require it.
+
+ Raises:
+ ValueError: If any required argument is missing.
+ """
+ # Initiate parser and parse arguments
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-c',
+ '--config',
+ type=str,
+ help=''
+ )
+
+ model_sel = [model.value for model in list(ModelSelection)]
+ parser.add_argument(
+ '-a',
+ '--architecture',
+ type=ModelSelection,
+ choices=list(ModelSelection),
+ metavar=f'{model_sel}',
+ default=architecture,
+ help='The model architecture family to extract the embeddings from'
+ )
+ parser.add_argument(
+ '-m',
+ '--model-path',
+ type=str,
+ default=model_path,
+ help='The specific model path to extract the embeddings from'
+ )
+ parser.add_argument(
+ '-d',
+ '--debug',
+ default=None,
+ action='store_true',
+ help='Print out debug statements'
+ )
+ parser.add_argument(
+ '-l',
+ '--log-named-modules',
+ default=None,
+ action='store_true',
+ help='Logs the named modules for the specified model'
+ )
+ parser.add_argument(
+ '-i',
+ '--input-dir',
+ type=str,
+ help='The specified input directory to read data from'
+ )
+ parser.add_argument(
+ '-o',
+ '--output-db',
+ type=str,
+ help=(
+ 'The specified output database to save the tensors to, '
+ 'defaults to embedding.db'
+ )
+ )
+ parser.add_argument(
+ '--device',
+ type=str,
+ default='cuda',
+ choices=['cuda', 'cpu'],
+ help='Specify the device to send tensors and the model to'
+ )
+ parser.add_argument(
+ '--download-path',
+ type=str,
+ help='The path where downloaded models should be stored'
+ )
+ parser.add_argument(
+ '--pooling-method',
+ type=str,
+ default=None,
+ choices=['mean', 'max'],
+ help='The type of pooling to use for the output embeddings'
+ )
+
+ # only parse the args that we know, and throw out what we don't know
+ args = parser.parse_known_args()[0]
+
+ # the set of potential keys should be defined by the config + any
+ # other special ones here (such as the model args)
+ config_keys = list(args.__dict__.keys())
+ config_keys.append('model')
+ config_keys.append('prompt')
+ config_keys.append('modules')
+ config_keys.append('forward')
+ config_keys.append('dataset')
+
+ # first read the config file and set the current attributes to it
+ # then parse through the other arguments as that's what we want use to
+ # override the config file if supplied
+ if args.config:
+ with open(args.config, 'r') as file:
+ data = yaml.safe_load(file)
+
+ for key in config_keys:
+ if key in data.keys():
+ setattr(self, key, data[key])
+
+ # now we take all the arguments we want and we copy it over!
+ for key, value in args._get_kwargs():
+ if value is not None:
+ setattr(self, key, value)
+
+ # we set the debug flag to False if it doesn't exist
+ # And to whatever we would normally set it to otherwise
+ self.debug = (
+ hasattr(self, 'debug') and self.debug
+ )
+ if self.debug:
+ logging.getLogger().setLevel(logging.DEBUG)
+ else:
+ logging.getLogger().setLevel(logging.INFO)
+
+ # require that the architecture and the model path to exist
+ assert all(
+ hasattr(self, attr) and getattr(self, attr) is not None
+ for attr in ('architecture', 'model_path')
+ ), (
+ 'Fields `architecture` and `model_path` in yaml config must exist, '
+ 'otherwise, --architecture and --model-path must be set'
+ )
+
+ # change the architecture type to an enum
+ if not isinstance(self.architecture, ModelSelection):
+ assert self.architecture in model_sel, (
+ f'Architecture {self.architecture} not supported, '
+ f'use one of {model_sel}'
+ )
+ self.architecture = ModelSelection(self.architecture)
+
+ # if the model is set, make sure that it is a mapping
+ if hasattr(self, 'model'):
+ model_mapping = {}
+ for mapping in self.model:
+ model_mapping = {**model_mapping, **mapping}
+ self.model = model_mapping
+
+ # if forward is set, make sure that it is a mapping
+ if hasattr(self, 'forward'):
+ forward_mapping = {}
+ for mapping in self.forward:
+ forward_mapping = {**forward_mapping, **mapping}
+ self.forward = forward_mapping
+
+ # do an early return if we don't need the modules
+ self.log_named_modules = (
+ hasattr(self, 'log_named_modules') and self.log_named_modules
+ )
+ if self.log_named_modules:
+ return
+
+ # override the modules if we have a module passed in
+ if module is not None:
+ self.modules = [module]
+ assert hasattr(self, 'modules') and self.modules is not None, (
+ 'Must declare at least one module.'
+ )
+ self.set_modules(self.modules)
+
+ # make sure only one of dataset or input_dir is set
+ if hasattr(self, 'dataset') and hasattr(self, 'input_dir'):
+ raise ValueError(
+ 'Only one of `dataset` or `input_dir` can be set, '
+ 'not both. Please choose one.'
+ )
+
+ self.image_paths = []
+ if hasattr(self, 'dataset'):
+ # Make sure it is a mapping
+ ds_mapping = {}
+ for mapping in self.dataset:
+ ds_mapping = {**ds_mapping, **mapping}
+
+ dataset_path = ds_mapping.get('dataset_path', None)
+ local_dataset_path = ds_mapping.get('local_dataset_path', None)
+
+ # Check that the user uses either a local or hosted dataset (not both)
+ assert ((dataset_path and not local_dataset_path) or
+ (not dataset_path and local_dataset_path)), (
+ 'One of `dataset_path` (for hosted datasets) or `local_dataset_path` (for local datasets)'
+ 'must be set.'
+ )
+
+ dataset = None
+ dataset_split = ds_mapping.get('dataset_split', None)
+ if dataset_path:
+ # Dataset is hosted
+ logging.debug(f'Loading dataset from {dataset_path} with split={dataset_split}...')
+ dataset = load_dataset(dataset_path)
+
+ elif local_dataset_path:
+ # Dataset is local
+ logging.debug(f'Loading dataset from {local_dataset_path} with split={dataset_split}...')
+ dataset = load_from_disk(local_dataset_path)
+
+ dataset = dataset[dataset_split] if dataset_split else dataset
+
+ # Load image dataset
+ img_dir = ds_mapping.get('image_dataset_path', None)
+ if img_dir:
+ logging.debug(
+ f'Locating image dataset from {img_dir}...')
+
+ # Accounts for mapping relative paths as well as filenames
+ dataset = dataset.map(
+ lambda row: {'image': os.path.join(img_dir, row['image'])})
+
+ self.image_paths = dataset['image'] # for debug purposes
+
+ self.dataset = dataset
+
+ else:
+ self.dataset = None
+ self.set_image_paths(self.input_dir
+ if hasattr(self, 'input_dir') else
+ None)
+ # override the modules if we have a module passed in
+ if prompt is not None:
+ self.prompt = prompt
+ # check if there is no input data
+ if not (self.dataset or self.has_images() or hasattr(self, 'prompt')):
+ raise ValueError(
+ 'Input directory was either not provided or empty '
+ 'and no prompt was provided'
+ )
+
+ # now sets the specific device, first does a check to make sure that if
+ # the user wants to use cuda that it is available
+ if 'cuda' in self.device and not torch.cuda.is_available():
+ raise ValueError('Device set to cuda but no GPU found for this machine')
+
+ self.device = torch.device(self.device)
+ self.DB_TABLE_NAME = 'tensors'
+ self.NO_IMG_PROMPT = 'No image prompt'
+
+ # if there is no output database set, use embeddings.db as the default
+ if not hasattr(self, 'output_db'):
+ self.output_db = 'embeddings.db'
+
+ def has_images(self) -> bool:
+ """Returns a boolean for whether or not the input directory has images.
+
+ Returns:
+ bool: Whether or not the input directory has images.
+ """
+ if not self.dataset:
+ return len(self.image_paths) > 0
+ else:
+ return 'image' in self.dataset.column_names
+
+ def matches_module(self, module_name: str) -> bool:
+ """Returns whether the given module name matches one of the regexes.
+
+ Args:
+ module_name (str): The module name to match.
+
+ Returns:
+ bool: Whether the given module name matches the config's module
+ regexes.
+ """
+ for module in self.modules:
+ if module.fullmatch(module_name):
+ return True
+ return False
+
+ def set_prompt(self, prompt: str) -> None:
+ """Sets the prompt for the specific config.
+
+ Args:
+ prompt (str): Prompt to set.
+ """
+ self.prompt = prompt
+
+ def set_modules(self, to_match_modules: List[str]) -> None:
+ """Sets the modules for the specific config.
+
+ Args:
+ to_match_modules (List[str]): The module regexes to match.
+ """
+ self.modules = [re.compile(module) for module in to_match_modules]
+
+ def set_image_paths(self, input_dir: Optional[str]) -> None:
+ """Sets the images based on the input directory.
+
+ Args:
+ input_dir (Optional[str]): The input directory.
+ """
+ if input_dir is None:
+ return
+ # now we take a look through all the images in the input directory
+ # and add those paths to image_paths
+ image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']
+ self.image_paths = [
+ os.path.join(root, file_path)
+ for root, _, files in os.walk(input_dir)
+ for file_path in files
+ if os.path.splitext(file_path)[1].lower() in image_exts
+ ]
diff --git a/src/models/glamm/__init__.py b/src/models/glamm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d9c091398ddca4c5b66fb9d6575bc610833d46e
--- /dev/null
+++ b/src/models/glamm/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Glamm package, includes which of this module to export.
+"""
+from .glamm import GlammModel
+
+__all__ = [
+ 'GlammModel'
+]
diff --git a/src/models/glamm/__pycache__/__init__.cpython-310.pyc b/src/models/glamm/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e6bb03c04fb0064aeae103afcc62d3ca206fac18
Binary files /dev/null and b/src/models/glamm/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/glamm/__pycache__/glamm.cpython-310.pyc b/src/models/glamm/__pycache__/glamm.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3227ff4ebe19304087ccacbd3cc1e9a730bad999
Binary files /dev/null and b/src/models/glamm/__pycache__/glamm.cpython-310.pyc differ
diff --git a/src/models/glamm/glamm.py b/src/models/glamm/glamm.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9c6c70b0af318b0fb63070b19c3bc322b4e073a
--- /dev/null
+++ b/src/models/glamm/glamm.py
@@ -0,0 +1,197 @@
+"""glamm.py.
+
+File for providing model implementations for any models using AutoModel.
+"""
+
+import logging
+import os
+import sys
+
+import cv2
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, CLIPImageProcessor
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+sys.path.append(os.path.join(os.path.dirname(__file__), 'groundingLMM'))
+
+from model.GLaMM import GLaMMForCausalLM # noqa: E402
+from model.llava.mm_utils import tokenizer_image_token # noqa: E402
+from model.SAM.utils.transforms import ResizeLongestSide # noqa: E402
+from tools.utils import DEFAULT_IM_END_TOKEN # noqa: E402
+from tools.utils import DEFAULT_IM_START_TOKEN # noqa: E402
+from tools.utils import DEFAULT_IMAGE_TOKEN # noqa: E402
+
+
+def grounding_enc_processor(x: torch.Tensor) -> torch.Tensor:
+ """Preprocess function.
+
+ Args:
+ x (torch.Tensor): Input tensor to preprocess.
+
+ Returns:
+ torch.Tensor: The preprocessed tensor.
+ """
+ IMG_MEAN = torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1)
+ IMG_STD = torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1)
+ IMG_SIZE = 1024
+ x = (x - IMG_MEAN) / IMG_STD
+ h, w = x.shape[-2:]
+ x = F.pad(x, (0, IMG_SIZE - w, 0, IMG_SIZE - h))
+ return x
+
+
+def prepare_model_for_inference(model: GLaMMForCausalLM, args: dict) -> GLaMMForCausalLM:
+ """Initialize vision tower.
+
+ Args:
+ model (GLaMMForCausalLM): The model to prepare.
+ args (dict): The arguments containing configuration options.
+
+ Returns:
+ GLaMMForCausalLM: The prepared model.
+ """
+ print(
+ '\033[92m' + '---- Initialized Global Image Encoder (vision tower) from: {} ----'.format(
+ args['vision_tower']
+ ) + '\033[0m'
+ )
+ model.get_model().initialize_vision_modules(model.get_model().config)
+ vision_tower = model.get_model().get_vision_tower()
+ vision_tower.to(dtype=torch.bfloat16, device=args['local_rank'])
+ model = model.bfloat16().cuda()
+ return model
+
+
+class GlammModel(ModelBase):
+ """Glamm model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the llava model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ # set up tokenizer first
+ self.tokenizer = AutoTokenizer.from_pretrained(
+ self.config.model_path,
+ model_max_length=self.config.model['model_max_length'],
+ padding_side='right',
+ use_fast=False
+ )
+ self.tokenizer.pad_token = self.tokenizer.unk_token
+ self.config.model['bbox_token_idx'] = self.tokenizer('', add_special_tokens=False).input_ids[0]
+ self.config.model['seg_token_idx'] = self.tokenizer('[SEG]', add_special_tokens=False).input_ids[0]
+ self.config.model['bop_token_idx'] = self.tokenizer('', add_special_tokens=False).input_ids[0]
+ self.config.model['eop_token_idx'] = self.tokenizer('
', add_special_tokens=False).input_ids[0]
+
+ model_args = {
+ 'seg_token_idx': self.config.model['seg_token_idx'],
+ 'bbox_token_idx': self.config.model['bbox_token_idx'],
+ 'eop_token_idx': self.config.model['eop_token_idx'],
+ 'bop_token_idx': self.config.model['bop_token_idx'],
+ }
+
+ self.model = GLaMMForCausalLM.from_pretrained(
+ self.config.model_path,
+ torch_dtype=torch.bfloat16,
+ low_cpu_mem_usage=True,
+ **model_args
+ )
+ self.model = prepare_model_for_inference(self.model, self.config.model)
+
+ def _init_processor(self) -> None:
+ """Set the self.processor to follow the example given.
+
+ This should follow the processor setting and tokenizers under:
+ https://github.com/mbzuai-oryx/groundingLMM/blob/main/app.py
+ """
+ processor = {
+ 'global_enc_processor': CLIPImageProcessor.from_pretrained(self.config.model['vision_tower']),
+ 'grounding_transform': ResizeLongestSide(self.config.model['image_size'])
+ }
+ self.processor = processor
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Generates the GLaMM model prompt which will not use the chat template.
+
+ Args:
+ prompt (str): The input prompt string.
+
+ Returns:
+ str: The prompt to return, set by the config.
+ """
+ prompt = f'The {DEFAULT_IMAGE_TOKEN} provides an overview of the picture.\n{prompt}'
+ if self.config.model['use_mm_start_end']:
+ replace_token = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
+ prompt = prompt.replace(DEFAULT_IMAGE_TOKEN, replace_token)
+ return prompt
+
+ def _generate_processor_output(self, prompt: str, img_path: str) -> dict:
+ """Generate the processor argument to be input into the processor.
+
+ Args:
+ prompt (str): The generated prompt string with the input text and the image labels.
+ img_path (str): The specified image path.
+
+ Returns:
+ dict: The corresponding processor arguments per image and prompt.
+
+ Raises:
+ ValueError: If the image path is not defined.
+ """
+ if img_path is None:
+ raise ValueError('GLAMM cannot have text-only generation.')
+
+ image_np = cv2.imread(img_path)
+ image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
+ orig_h, orig_w = image_np.shape[:2]
+ original_size_list = [(orig_h, orig_w)]
+
+ # Global encoder
+ global_enc_image = self.processor['global_enc_processor'].preprocess(
+ image_np, return_tensors='pt')['pixel_values'][0].unsqueeze(0).cuda().bfloat16()
+
+ # Grounding encoder
+ grounding_input = self.processor['grounding_transform'].apply_image(image_np)
+ resize_list = [grounding_input.shape[:2]]
+ grounding_enc_image = grounding_enc_processor(
+ torch.from_numpy(grounding_input).permute(2, 0, 1).contiguous()
+ ).unsqueeze(0).cuda().bfloat16()
+
+ input_ids = tokenizer_image_token(prompt, self.tokenizer, return_tensors='pt').unsqueeze(0).cuda()
+
+ return {
+ 'input_ids': input_ids,
+ 'global_enc_image': global_enc_image,
+ 'grounding_enc_image': grounding_enc_image,
+ 'resize_list': resize_list,
+ 'original_size_list': original_size_list,
+ 'bboxes': None
+ }
+
+ def _forward(self, data: dict) -> None:
+ """Given some input data, performs a single forward pass.
+
+ This function itself can be overriden, while _hook_and_eval should be left in tact.
+
+ Args:
+ data (dict): The given data tensor.
+ """
+ with torch.no_grad():
+ output_ids, _ = self.model.evaluate(
+ data['global_enc_image'],
+ data['grounding_enc_image'],
+ data['input_ids'],
+ data['resize_list'],
+ data['original_size_list'],
+ max_tokens_new=self.config.forward['max_new_tokens'],
+ bboxes=data['bboxes']
+ )
+ logging.debug('Completed forward pass')
diff --git a/src/models/internlm_xc/__init__.py b/src/models/internlm_xc/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b77d034fe4831a0af67433cd64582b8ca220a89f
--- /dev/null
+++ b/src/models/internlm_xc/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+InternLM package, includes which of this module to export.
+"""
+from .internlm_xc import InternLMXComposerModel
+
+__all__ = [
+ 'InternLMXComposerModel'
+]
diff --git a/src/models/internlm_xc/__pycache__/__init__.cpython-310.pyc b/src/models/internlm_xc/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b74103631a62bcc69f57007019e178550988866e
Binary files /dev/null and b/src/models/internlm_xc/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/internlm_xc/__pycache__/internlm_xc.cpython-310.pyc b/src/models/internlm_xc/__pycache__/internlm_xc.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e7de4f488012356be5435d0c29d6f73975db649
Binary files /dev/null and b/src/models/internlm_xc/__pycache__/internlm_xc.cpython-310.pyc differ
diff --git a/src/models/internlm_xc/internlm_xc.py b/src/models/internlm_xc/internlm_xc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5df015aa9c03f7b74b217bd2363d914a0bf00d9c
--- /dev/null
+++ b/src/models/internlm_xc/internlm_xc.py
@@ -0,0 +1,98 @@
+"""internlm.py.
+
+File for providing the InternLM-XComposer model implementation.
+"""
+import logging
+
+import torch
+from transformers import AutoModel, AutoProcessor
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class InternLMXComposerModel(ModelBase):
+ """InternLM model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the InternLM model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = AutoModel.from_pretrained(
+ self.model_path,
+ trust_remote_code=True,
+ **self.config.model
+ ) if hasattr(self.config, 'model') else (
+ AutoModel.from_pretrained(
+ self.model_path,
+ trust_remote_code=True
+ )
+ )
+
+ def _init_processor(self) -> None:
+ """Overridden function to instantiate the model's processor."""
+ self.processor = AutoProcessor.from_pretrained(
+ self.model_path, trust_remote_code=True)
+ self.model.tokenizer = self.processor
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Overridden function to generate the prompt for the model.
+
+ Args:
+ prompt (str): The input prompt to be processed.
+
+ Returns:
+ str: The formatted prompt ready for model input.
+ """
+ return prompt
+
+ def _generate_processor_output(self, prompt: str, img_path: str) -> dict:
+ """Overridden function to generate the format the prompt for the processor.
+
+ Args:
+ prompt (str): The input prompt to be processed.
+ img_path (str): The path to the image to be processed.
+
+ Returns:
+ dict: The formatted inputs for the processor.
+
+ Raises:
+ ValueError: If no prompt is provided when required.
+ """
+ logging.debug('Loading data...')
+
+ # Manually format input as we do not need a processor
+ inputs = {}
+
+ # Text prompts are required for this model
+ if not prompt:
+ raise ValueError(
+ 'No input prompt was provided for the InternLM-XC model')
+
+ # If there are images, load them and add image token to prompt
+ if self.config.has_images():
+ inputs['query'] = f'; {prompt}'
+ inputs['image'] = [img_path]
+ else:
+ inputs['query'] = prompt
+
+ return inputs
+
+ def _forward(self, data: dict) -> None:
+ """Overridden function to run the model forward pass.
+
+ Args:
+ data (dict): The input data for the model.
+ """
+ device_type = str(self.config.device)
+ logging.debug(f'DATA: {data}')
+ with torch.autocast(device_type=device_type):
+ _, _ = self.model.chat(
+ self.processor, **data, **self.config.forward)
diff --git a/src/models/internvl/__init__.py b/src/models/internvl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f392ac3ef4bcbc8aaed84ec2d341d9e33e142356
--- /dev/null
+++ b/src/models/internvl/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Clip package, includes which of this module to export.
+"""
+from .internvl import InternVLModel
+
+__all__ = [
+ 'InternVLModel'
+]
diff --git a/src/models/internvl/__pycache__/__init__.cpython-310.pyc b/src/models/internvl/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..db804025796f5dffdb90b3ddf472d694eff8f18e
Binary files /dev/null and b/src/models/internvl/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/internvl/__pycache__/internvl.cpython-310.pyc b/src/models/internvl/__pycache__/internvl.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..43a9f7d8806bd5d39e0badebb592e73457acebcd
Binary files /dev/null and b/src/models/internvl/__pycache__/internvl.cpython-310.pyc differ
diff --git a/src/models/internvl/__pycache__/utils.cpython-310.pyc b/src/models/internvl/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..083673a177e765ee3b535fe1749c128f0d33a81e
Binary files /dev/null and b/src/models/internvl/__pycache__/utils.cpython-310.pyc differ
diff --git a/src/models/internvl/internvl.py b/src/models/internvl/internvl.py
new file mode 100644
index 0000000000000000000000000000000000000000..a45027dc641bbdf7891e589e8a084133b223c067
--- /dev/null
+++ b/src/models/internvl/internvl.py
@@ -0,0 +1,82 @@
+"""internvl.py.
+
+File for providing the Intern-VL model implementation.
+"""
+
+import logging
+
+import torch
+from transformers import AutoModel, AutoTokenizer
+from transformers.feature_extraction_utils import BatchFeature
+
+from src.models.base import ModelBase
+from src.models.config import Config
+from src.models.internvl.utils import load_image
+
+
+class InternVLModel(ModelBase):
+ """InternVL model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the InternVL model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = AutoModel.from_pretrained(
+ self.model_path, **self.config.model
+ ) if hasattr(self.config, 'model') else (
+ AutoModel.from_pretrained(
+ self.model_path
+ )
+ )
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Generates the InternVL model prompt which will not use the chat template.
+
+ Args:
+ prompt (str): The prompt to return, set by the config.
+
+ Returns:
+ str: The prompt to return, set by the config.
+ """
+ return prompt
+
+ def _init_processor(self) -> None:
+ """Initialize the InternVL processor which need to be done manually."""
+ self.processor = None # no intended processor here
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True, use_fast=False)
+ self.img_processor = load_image
+
+ def _generate_processor_output(self, prompt: str, img_path: str) -> dict:
+ """Generate the processor outputs from the prompt and image path.
+
+ Args:
+ prompt (str): The generated prompt string with the input text and
+ the image labels.
+ img_path (str): The specified image path.
+
+ Returns:
+ dict: The corresponding processor output per image and prompt.
+ """
+ return {'prompt': prompt,
+ 'pixel_values': None if img_path is None else self.img_processor(img_path, max_num=12).to(dtype=torch.bfloat16).to(self.config.device)}
+
+ def _forward(self, data: BatchFeature) -> None:
+ """Given some input data, performs a single forward pass.
+
+ This function itself can be overriden, while _hook_and_eval
+ should be left in tact.
+
+ Args:
+ data (BatchFeature): The given data tensor.
+ """
+ generation_config = self.config.forward
+ with torch.no_grad():
+ _ = self.model.chat(self.tokenizer, data['pixel_values'], data['prompt'], generation_config)
+ logging.debug('Completed forward pass...')
diff --git a/src/models/internvl/utils.py b/src/models/internvl/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0978a3201b10cadfc56b2e17f5ff631321a5df7c
--- /dev/null
+++ b/src/models/internvl/utils.py
@@ -0,0 +1,135 @@
+"""Helper functions from official huggingface library of InternVL."""
+
+from typing import List, Optional, Tuple
+
+import torch
+import torchvision.transforms as T
+from PIL import Image
+from torchvision.transforms.functional import InterpolationMode
+
+IMAGENET_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_STD = (0.229, 0.224, 0.225)
+
+
+def build_transform(input_size: Optional[int] = 448) -> T.Compose:
+ """Helper function that transform image.
+
+ Args:
+ input_size (int, optional): The input size. Defaults to 448.
+
+ Returns:
+ T.Compose: The composed transform.
+ """
+ MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
+ return T.Compose([
+ T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
+ T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
+ T.ToTensor(),
+ T.Normalize(mean=MEAN, std=STD)
+ ])
+
+
+def find_closest_aspect_ratio(
+ aspect_ratio: float, target_ratios: List[Tuple[float, float]],
+ width: int, height: int, image_size: int) -> Tuple[int, int]:
+ """Helper function that find closest aspect ratio.
+
+ Args:
+ aspect_ratio (float): The existing image aspect ratio.
+ target_ratios (list): The target aspect ratios.
+ width (int): The original image width.
+ height (int): The original image height.
+ image_size (int): The target image size.
+
+ Returns:
+ tuple: The closest aspect ratio.
+ """
+ best_ratio_diff = float('inf')
+ best_ratio = (1, 1)
+ area = width * height
+ for ratio in target_ratios:
+ target_aspect_ratio = ratio[0] / ratio[1]
+ ratio_diff = abs(aspect_ratio - target_aspect_ratio)
+ if ratio_diff < best_ratio_diff:
+ best_ratio_diff = ratio_diff
+ best_ratio = ratio
+ elif ratio_diff == best_ratio_diff:
+ if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
+ best_ratio = ratio
+ return best_ratio
+
+
+def dynamic_preprocess(
+ image: Image, min_num: Optional[int] = 1, max_num: Optional[int] = 12,
+ image_size: Optional[int] = 448, use_thumbnail: Optional[bool] = False) -> List[Image]:
+ """Helper function.
+
+ Args:
+ image (Image): The input image.
+ min_num (int, optional): The minimum number of image patches. Defaults to 1.
+ max_num (int, optional): The maximum number of image patches. Defaults to 12.
+ image_size (int, optional): The target image size. Defaults to 448.
+ use_thumbnail (bool, optional): Whether to use thumbnail. Defaults to False.
+
+ Returns:
+ list: The processed images.
+ """
+ orig_width, orig_height = image.size
+ aspect_ratio = orig_width / orig_height
+
+ # calculate the existing image aspect ratio
+ target_ratios = {
+ (i, j)
+ for n in range(min_num, max_num + 1)
+ for i in range(1, n + 1)
+ for j in range(1, n + 1)
+ if i * j <= max_num and i * j >= min_num
+ }
+ target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
+
+ # find the closest aspect ratio to the target
+ target_aspect_ratio = find_closest_aspect_ratio(
+ aspect_ratio, target_ratios, orig_width, orig_height, image_size)
+
+ # calculate the target width and height
+ target_width = image_size * target_aspect_ratio[0]
+ target_height = image_size * target_aspect_ratio[1]
+ blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
+
+ # resize the image
+ resized_img = image.resize((target_width, target_height))
+ processed_images = []
+ for i in range(blocks):
+ box = (
+ (i % (target_width // image_size)) * image_size,
+ (i // (target_width // image_size)) * image_size,
+ ((i % (target_width // image_size)) + 1) * image_size,
+ ((i // (target_width // image_size)) + 1) * image_size
+ )
+ # split the image
+ split_img = resized_img.crop(box)
+ processed_images.append(split_img)
+ assert len(processed_images) == blocks
+ if use_thumbnail and len(processed_images) != 1:
+ thumbnail_img = image.resize((image_size, image_size))
+ processed_images.append(thumbnail_img)
+ return processed_images
+
+
+def load_image(image_file: str, input_size: Optional[int] = 448, max_num: Optional[int] = 12) -> torch.Tensor:
+ """Load image to pixel values.
+
+ Args:
+ image_file (str): The image file path.
+ input_size (int, optional): The input size. Defaults to 448.
+ max_num (int, optional): The max number of image patches. Defaults to 12.
+
+ Returns:
+ torch.Tensor: The corresponding pixel values.
+ """
+ image = Image.open(image_file).convert('RGB')
+ transform = build_transform(input_size=input_size)
+ images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
+ pixel_values = [transform(image) for image in images]
+ pixel_values = torch.stack(pixel_values)
+ return pixel_values
diff --git a/src/models/janus/__init__.py b/src/models/janus/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb320a1c9f10c4aea57a1e596b9f259545fe3782
--- /dev/null
+++ b/src/models/janus/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Janus package, includes which of this module to export.
+"""
+from .janus import JanusModel
+
+__all__ = [
+ 'JanusModel'
+]
diff --git a/src/models/janus/__pycache__/__init__.cpython-310.pyc b/src/models/janus/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..53515ac06ead83dc2e79d631b3ceec704d1dfe89
Binary files /dev/null and b/src/models/janus/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/janus/__pycache__/janus.cpython-310.pyc b/src/models/janus/__pycache__/janus.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..57d0b08d9992d21ab6e727ff45cfa0ee8ea39190
Binary files /dev/null and b/src/models/janus/__pycache__/janus.cpython-310.pyc differ
diff --git a/src/models/janus/janus.py b/src/models/janus/janus.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2e2a7172fe3a3dc2109ddf4cdae136063916719
--- /dev/null
+++ b/src/models/janus/janus.py
@@ -0,0 +1,93 @@
+"""janus.py.
+
+File for providing the Janus model implementation.
+"""
+import torch
+from transformers import JanusForConditionalGeneration, JanusProcessor
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class JanusModel(ModelBase):
+ """Janus model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialize the Janus model.
+
+ Args:
+ config (Config): Parsed config.
+ """
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Populate self.model with the specified Janus model."""
+ # require this import to force the models script to load
+ self.model = (
+ JanusForConditionalGeneration.from_pretrained(
+ self.model_path,
+ **self.config.model
+ ) if hasattr(self.config, 'model') else
+ JanusForConditionalGeneration.from_pretrained(
+ self.model_path,
+ )
+ )
+ self.model.to(torch.bfloat16)
+
+ def _init_processor(self) -> None:
+ """Initialize the Janus processor."""
+ self.processor = JanusProcessor.from_pretrained(self.model_path)
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Generates the prompt string with the input messages.
+
+ Args:
+ prompt (str): prompt content.
+
+ Returns:
+ str: Returns the prompt content as is.
+ """
+ return prompt
+
+ def _generate_processor_output(self, prompt: str, img_path: str) -> dict:
+ """Override the base function to produce processor arguments for Janus.
+
+ Args:
+ prompt (str): The input prompt to be processed.
+ img_path (str): The path to the image to be processed.
+
+ Returns:
+ dict: The formatted inputs for the processor.
+ """
+ # Do the _generate_prompt first
+ messages = [
+ {
+ 'role': 'user',
+ 'content': [
+ {'type': 'image', 'image': img_path},
+ {'type': 'text', 'text': prompt}
+ ]
+ }
+ ]
+
+ inputs = self.processor.apply_chat_template(
+ messages,
+ add_generation_prompt=True,
+ generation_mode='text',
+ tokenize=True,
+ return_dict=True,
+ return_tensors='pt',
+ ).to(self.config.device, dtype=torch.bfloat16)
+
+ return inputs
+
+ def _forward(self, data: dict) -> None:
+ """Given some input data, performs a single forward pass.
+
+ This function itself can be overriden, while _hook_and_eval
+ should be left in tact.
+
+ Args:
+ data (dict): The given data tensor.
+ """
+ _ = self.model.generate(**data, **self.config.forward)
diff --git a/src/models/llava/__init__.py b/src/models/llava/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbbb07fb4e8fe3fa4429c5315103cbf551220e13
--- /dev/null
+++ b/src/models/llava/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Llava package, includes which of this module to export.
+"""
+from .llava import LlavaModel
+
+__all__ = [
+ 'LlavaModel'
+]
diff --git a/src/models/llava/__pycache__/__init__.cpython-310.pyc b/src/models/llava/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b17ea03ab5a6bee3ca11d2fe6bc6e70e13deae6c
Binary files /dev/null and b/src/models/llava/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/llava/__pycache__/llava.cpython-310.pyc b/src/models/llava/__pycache__/llava.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..86e63076a3b4b435bda8c95551faafcba70ab50c
Binary files /dev/null and b/src/models/llava/__pycache__/llava.cpython-310.pyc differ
diff --git a/src/models/llava/llava.py b/src/models/llava/llava.py
new file mode 100644
index 0000000000000000000000000000000000000000..effda4dafb07c7dcd36f49af5198c2e3d79bfcc5
--- /dev/null
+++ b/src/models/llava/llava.py
@@ -0,0 +1,31 @@
+"""llava.py.
+
+File for providing the Llava model implementation.
+"""
+from transformers import LlavaForConditionalGeneration
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class LlavaModel(ModelBase):
+ """Llava model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the llava model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = LlavaForConditionalGeneration.from_pretrained(
+ self.model_path, **self.config.model
+ ) if hasattr(self.config, 'model') else (
+ LlavaForConditionalGeneration.from_pretrained(
+ self.model_path
+ )
+ )
diff --git a/src/models/minicpm/__init__.py b/src/models/minicpm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bde64cfe6a8e1e841a67a6b97f61d5c5b7e05394
--- /dev/null
+++ b/src/models/minicpm/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+MiniCPM package, includes which of this module to export.
+"""
+from .minicpm import MiniCPMModel
+
+__all__ = [
+ 'MiniCPMModel'
+]
diff --git a/src/models/minicpm/__pycache__/__init__.cpython-310.pyc b/src/models/minicpm/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72d34dd3cfb3e80f0ad6b4e159a5ee7b697b8272
Binary files /dev/null and b/src/models/minicpm/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/minicpm/__pycache__/minicpm.cpython-310.pyc b/src/models/minicpm/__pycache__/minicpm.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b7612896110f7f019736e310ca86080ede2d984
Binary files /dev/null and b/src/models/minicpm/__pycache__/minicpm.cpython-310.pyc differ
diff --git a/src/models/minicpm/minicpm.py b/src/models/minicpm/minicpm.py
new file mode 100644
index 0000000000000000000000000000000000000000..41aea8d53d3315c2de693335d4db160895954ca6
--- /dev/null
+++ b/src/models/minicpm/minicpm.py
@@ -0,0 +1,77 @@
+"""minicpm.py.
+
+File for providing the MiniCPM model implementation.
+"""
+
+import logging
+
+import torch
+from PIL import Image
+from transformers import AutoModel, AutoTokenizer
+from transformers.feature_extraction_utils import BatchFeature
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class MiniCPMModel(ModelBase):
+ """MiniCPM model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the MiniCPM model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = AutoModel.from_pretrained(
+ self.model_path, **getattr(self.config, 'model', {})
+ )
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Generates the MiniCPM model prompt which will not use the chat template.
+
+ Args:
+ prompt (str): The prompt content.
+
+ Returns:
+ str: The prompt to return, set by the config.
+ """
+ return prompt
+
+ def _init_processor(self) -> None:
+ """Initialize the MiniCPM tokenizer."""
+ self.processor = None # no intended processor here
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
+
+ def _generate_processor_output(self, prompt: str, img_path: str) -> dict:
+ """Generate the processor outputs from the prompt and image path.
+
+ Args:
+ prompt (str): The generated prompt string with the input text and
+ the image labels.
+ img_path (str): The specified image path.
+
+ Returns:
+ dict: The corresponding processor output per image and prompt.
+ """
+ msgs = [{'role': 'user', 'content': prompt}]
+ image = Image.open(img_path).convert('RGB')
+ return {'msgs': msgs, 'image': image}
+
+ def _forward(self, data: BatchFeature) -> None:
+ """Given some input data, performs a single forward pass.
+
+ This function itself can be overriden, while _hook_and_eval
+ should be left in tact.
+
+ Args:
+ data (BatchFeature): The given data tensor.
+ """
+ with torch.no_grad():
+ _ = self.model.chat(**data, context=None, tokenizer=self.tokenizer, **self.config.forward)
+ logging.debug('Completed forward pass...')
diff --git a/src/models/molmo/__init__.py b/src/models/molmo/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7f568316132ef42981332c299b374145dc791ee
--- /dev/null
+++ b/src/models/molmo/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Molmo package, includes which of this module to export.
+"""
+from .molmo import MolmoModel
+
+__all__ = [
+ 'MolmoModel'
+]
diff --git a/src/models/molmo/__pycache__/__init__.cpython-310.pyc b/src/models/molmo/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0cb4cda185aa0603cb293a3610ba999d4dd894c3
Binary files /dev/null and b/src/models/molmo/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/molmo/__pycache__/molmo.cpython-310.pyc b/src/models/molmo/__pycache__/molmo.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4fef7096313167fed99868e1211d39d48e072a4b
Binary files /dev/null and b/src/models/molmo/__pycache__/molmo.cpython-310.pyc differ
diff --git a/src/models/molmo/molmo.py b/src/models/molmo/molmo.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae450c240f81109d8dee4407b81ae4ac77a557dc
--- /dev/null
+++ b/src/models/molmo/molmo.py
@@ -0,0 +1,101 @@
+"""molmo.py.
+
+File for providing the Molmo model implementation.
+"""
+import logging
+
+import torch
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class MolmoModel(ModelBase):
+ """Molmo model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the molmo model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = AutoModelForCausalLM.from_pretrained(
+ self.model_path, **getattr(self.config, 'model', {}), trust_remote_code=True
+ )
+
+ def _init_processor(self) -> None:
+ """Initializes the processor."""
+ self.processor = AutoProcessor.from_pretrained(
+ self.config.model_path, **getattr(self.config, 'model', {}), trust_remote_code=True
+ )
+
+ def _generate_prompt(self, prompt: str, add_generation_prompt: bool = True, has_images: bool = False) -> str:
+ """Generates the Molmo model prompt which will not use the chat template.
+
+ [Note from Martin] I'd hack these parameters a bit for gradio, follow Base.
+
+ Args:
+ prompt (str): The prompt to return, set by the config.
+ add_generation_prompt (bool): Whether to add a start token of a bot
+ response.
+ has_images (bool): Whether the model has images or not.
+
+ Returns:
+ str: The prompt to return, set by the config.
+ """
+ return prompt
+
+ def _generate_processor_output(self, prompt: str, img_path: str) -> dict:
+ """Generate the processor argument to be input into the processor.
+
+ Args:
+ prompt (str): The generated prompt string with the input text and
+ the image labels.
+ img_path (str): The specified image path.
+
+ Returns:
+ dict: The corresponding processor arguments per image and prompt.
+
+ Raises:
+ ValueError: If no prompt is provided when required.
+ """
+ if img_path is None:
+ raise ValueError('Molmo cannot have text-only generation.')
+
+ # prepare the data inputs according to
+ # https://huggingface.co/allenai/Molmo-7B-D-0924
+ data_inputs = self.processor.process(
+ images=[Image.open(img_path)],
+ text=prompt
+ )
+
+ # move inputs to the correct device and make a batch of size 1
+ return {
+ k: v.to(self.config.device).unsqueeze(0)
+ for k, v in data_inputs.items()
+ }
+
+ def _forward(self, data: dict) -> None:
+ """Given some input data, performs a single forward pass.
+
+ This function itself can be overriden, while _hook_and_eval
+ should be left in tact.
+
+ Args:
+ data (dict): The given data tensor.
+ """
+ generation_config = self.config.forward
+ with torch.no_grad():
+ _ = self.model.generate_from_batch(
+ data,
+ GenerationConfig(**generation_config),
+ tokenizer=self.processor.tokenizer
+ )
+ logging.debug('Completed forward pass...')
diff --git a/src/models/paligemma/__init__.py b/src/models/paligemma/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb24b8656af6ac0e9bbde3cd1d0eb4f9c6b95419
--- /dev/null
+++ b/src/models/paligemma/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Paligemma package, includes which of this module to export.
+"""
+from .paligemma import PaligemmaModel
+
+__all__ = [
+ 'PaligemmaModel'
+]
diff --git a/src/models/paligemma/__pycache__/__init__.cpython-310.pyc b/src/models/paligemma/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..73790d75a7f4fd1750f35e13875785777741f15c
Binary files /dev/null and b/src/models/paligemma/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/paligemma/__pycache__/paligemma.cpython-310.pyc b/src/models/paligemma/__pycache__/paligemma.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e09d0edbc20b43e8afe290163474a60ba0e34f28
Binary files /dev/null and b/src/models/paligemma/__pycache__/paligemma.cpython-310.pyc differ
diff --git a/src/models/paligemma/paligemma.py b/src/models/paligemma/paligemma.py
new file mode 100644
index 0000000000000000000000000000000000000000..15cce0f9f8664ebc92549011349cf1abfda700ca
--- /dev/null
+++ b/src/models/paligemma/paligemma.py
@@ -0,0 +1,56 @@
+"""paligemma.py.
+
+File for providing the Paligemma model implementation.
+"""
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class PaligemmaModel(ModelBase):
+ """PaligemmaModel model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the paligemma model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate Paligemma model.
+
+ Huggingface token is required to get access to the model.
+ Replace in configs/paligemma-3b.yaml file with you own hugging face security token.
+ Note: 'token' is a general Hugging Face Hub access token, not specific to PaliGemma.
+ It enables loading private models or authenticated access.
+ See: https://huggingface.co/docs/hub/en/security-tokens
+ """
+ self.model = PaliGemmaForConditionalGeneration.from_pretrained(
+ self.model_path, **self.config.model
+ )
+
+ def _init_processor(self) -> None:
+ """Initialize the Paligemma processor.
+
+ Huggingface token is required.
+ Replace in configs/paligemma-3b.yaml file with you own hugging face security token.
+ Note: 'token' is a general Hugging Face Hub access token, not specific to PaliGemma.
+ It enables loading private models or authenticated access.
+ See: https://huggingface.co/docs/hub/en/security-tokens
+ """
+ self.processor = AutoProcessor.from_pretrained(self.model_path, token=self.config.model['token'])
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Generates the Paligemma model prompt which will not use the chat template.
+
+ Args:
+ prompt (str): The input prompt for the model.
+
+ Returns:
+ str: The prompt to return, set by the config.
+ """
+ return prompt
diff --git a/src/models/pixtral/__init__.py b/src/models/pixtral/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9c3d663b263d35cfc5eb3c7f29116c9015f0dcf
--- /dev/null
+++ b/src/models/pixtral/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Pixtral package, includes which of this module to export.
+"""
+from .pixtral import PixtralModel
+
+__all__ = [
+ 'PixtralModel'
+]
diff --git a/src/models/pixtral/__pycache__/__init__.cpython-310.pyc b/src/models/pixtral/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7c08c4b988f05b0887f5754086223643e2e599de
Binary files /dev/null and b/src/models/pixtral/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/pixtral/__pycache__/pixtral.cpython-310.pyc b/src/models/pixtral/__pycache__/pixtral.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..395dcd02837cfd9a79ab6e9a9d13687dc4ae432a
Binary files /dev/null and b/src/models/pixtral/__pycache__/pixtral.cpython-310.pyc differ
diff --git a/src/models/pixtral/pixtral.py b/src/models/pixtral/pixtral.py
new file mode 100644
index 0000000000000000000000000000000000000000..681e3ebd3cd8a81a073b5c4117bf6ecc37752cbf
--- /dev/null
+++ b/src/models/pixtral/pixtral.py
@@ -0,0 +1,103 @@
+"""pixtral.py.
+
+File for providing the Pixtral model implementation.
+"""
+import logging
+
+import torch
+from huggingface_hub import snapshot_download
+from mistral_common.protocol.instruct.messages import (ImageChunk, TextChunk,
+ UserMessage)
+from mistral_common.protocol.instruct.request import ChatCompletionRequest
+from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
+from mistral_inference.transformer import Transformer
+from PIL import Image
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class PixtralModel(ModelBase):
+ """Pixtral model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the Pixtral model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ snapshot_download(
+ repo_id=self.model_path,
+ allow_patterns=['params.json', 'consolidated.safetensors', 'tekken.json'],
+ local_dir=self.config.download_path,
+ )
+
+ self.model = Transformer.from_folder(self.config.download_path, **getattr(self.config, 'model', {}))
+
+ def _generate_prompt(self, prompt: str) -> str:
+ """Generates the Pixtral model prompt which will not use the chat template.
+
+ Args:
+ prompt (str): The input prompt for the model.
+
+ Returns:
+ str: The prompt to return, set by the config.
+ """
+ return prompt
+
+ def _init_processor(self) -> None:
+ """Initialize the Pixtral Tokenizer."""
+ self.processor = None # no intended processor here
+ self.tokenizer = MistralTokenizer.from_file(f'{self.config.download_path}/tekken.json')
+
+ def _generate_processor_output(self, prompt: str, img_path: str | None) -> dict:
+ """Generate the processor outputs from the prompt and image path.
+
+ Pixtral uses a specific chat template format with special image tokens.
+
+ Args:
+ prompt (str): The generated prompt string with the input text and
+ the image labels.
+ img_path (str or None): The specified image path, or None for text-only.
+
+ Returns:
+ dict: The corresponding processor output per image and prompt.
+ """
+ user_content = [TextChunk(text=prompt)]
+ if img_path is not None:
+ image = Image.open(img_path)
+ user_content = [ImageChunk(image=image)] + user_content
+
+ completion_request = ChatCompletionRequest(messages=[UserMessage(content=user_content)])
+ encoded = self.tokenizer.encode_chat_completion(completion_request)
+
+ res = {
+ 'input_ids': torch.tensor(encoded.tokens, dtype=torch.long, device=self.model.device),
+ 'seqlens': [len(encoded.tokens)],
+ }
+
+ if img_path is not None:
+ res['images'] = [
+ torch.tensor(img, device=self.model.device, dtype=self.model.dtype)
+ for img in encoded.images
+ ]
+
+ return res
+
+ def _forward(self, data: dict) -> None:
+ """Given some input data, performs a single forward pass.
+
+ This function itself can be overriden, while _hook_and_eval
+ should be left in tact.
+
+ Args:
+ data (dict): The given data tensor.
+ """
+ with torch.no_grad():
+ _ = self.model.forward(**data)
+ logging.debug('Completed forward pass...')
diff --git a/src/models/plm/__init__.py b/src/models/plm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..89a2c7992adb0bfdecf304cd00b6f0e4e1168159
--- /dev/null
+++ b/src/models/plm/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Perception Language Model(Plm) package, includes which of this module to export.
+"""
+from .plm import PlmModel
+
+__all__ = [
+ 'PlmModel'
+]
diff --git a/src/models/plm/__pycache__/__init__.cpython-310.pyc b/src/models/plm/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64f764c808697c778275d3f07b528097d69f7db6
Binary files /dev/null and b/src/models/plm/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/plm/__pycache__/plm.cpython-310.pyc b/src/models/plm/__pycache__/plm.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dbb27b035d0454f4b49c2405afb0a501235d76ff
Binary files /dev/null and b/src/models/plm/__pycache__/plm.cpython-310.pyc differ
diff --git a/src/models/plm/plm.py b/src/models/plm/plm.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ee55f644608ce30bef0a185dfa9c10e349391b8
--- /dev/null
+++ b/src/models/plm/plm.py
@@ -0,0 +1,55 @@
+"""plm.py.
+
+File for providing the Plm model implementation.
+"""
+
+import logging
+
+import torch
+from transformers import AutoModelForImageTextToText, AutoProcessor
+from transformers.feature_extraction_utils import BatchFeature
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class PlmModel(ModelBase):
+ """PLM model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the PLM model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = AutoModelForImageTextToText.from_pretrained(
+ self.model_path, **self.config.model
+ ) if hasattr(self.config, 'model') else (
+ AutoModelForImageTextToText.from_pretrained(
+ self.model_path
+ )
+ )
+ self.model.to(self.config.device)
+
+ def _init_processor(self) -> None:
+ """Initialize the self.processor by loading from the path."""
+ self.processor = AutoProcessor.from_pretrained(self.model_path, use_fast=True)
+
+ def _forward(self, data: BatchFeature) -> None:
+ """Given some input data, performs a single forward pass.
+
+ This function itself can be overriden, while _hook_and_eval
+ should be left in tact.
+
+ Args:
+ data (BatchFeature): The given data tensor.
+ """
+ data.to(self.config.device)
+ with torch.no_grad():
+ _ = self.model.generate(**data, **self.config.forward)
+ logging.debug('Completed forward pass...')
diff --git a/src/models/qwen/__init__.py b/src/models/qwen/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a858b45d7427abe9198bea7ed6ffc24543252eb
--- /dev/null
+++ b/src/models/qwen/__init__.py
@@ -0,0 +1,9 @@
+"""__init__.py.
+
+Qwen package, includes which of this module to export.
+"""
+from .qwen import QwenModel
+
+__all__ = [
+ 'QwenModel'
+]
diff --git a/src/models/qwen/__pycache__/__init__.cpython-310.pyc b/src/models/qwen/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e6de312f596394e271a61ae86be82b9e4273ffb
Binary files /dev/null and b/src/models/qwen/__pycache__/__init__.cpython-310.pyc differ
diff --git a/src/models/qwen/__pycache__/qwen.cpython-310.pyc b/src/models/qwen/__pycache__/qwen.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..025fe53bf75cd191f9a9a8d31a487299a901966e
Binary files /dev/null and b/src/models/qwen/__pycache__/qwen.cpython-310.pyc differ
diff --git a/src/models/qwen/qwen.py b/src/models/qwen/qwen.py
new file mode 100644
index 0000000000000000000000000000000000000000..11625b4561497a9eda4c6573ea4a6a67f1a8a132
--- /dev/null
+++ b/src/models/qwen/qwen.py
@@ -0,0 +1,31 @@
+"""qwen.py.
+
+File for providing the Qwen model implementation.
+"""
+from transformers import Qwen2VLForConditionalGeneration
+
+from src.models.base import ModelBase
+from src.models.config import Config
+
+
+class QwenModel(ModelBase):
+ """Qwen model implementation."""
+
+ def __init__(self, config: Config) -> None:
+ """Initialization of the qwen model.
+
+ Args:
+ config (Config): Parsed config
+ """
+ # initialize the parent class
+ super().__init__(config)
+
+ def _load_specific_model(self) -> None:
+ """Overridden function to populate self.model."""
+ self.model = Qwen2VLForConditionalGeneration.from_pretrained(
+ self.model_path, **self.config.model
+ ) if hasattr(self.config, 'model') else (
+ Qwen2VLForConditionalGeneration.from_pretrained(
+ self.model_path
+ )
+ )
diff --git a/src/probe/README.md b/src/probe/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c2d729fccef69f963891083861b40450ddbba735
--- /dev/null
+++ b/src/probe/README.md
@@ -0,0 +1,77 @@
+# Probe Implementation
+Using the output database of extracted features (generated using `src/main.py`), you can easily intialize probes using the script in `probe/main.py`.
+
+To do this, you need a probe config `yaml` file specifying: the probe `model` attributes, `training` and `test` configuration and the input `data`. The following is an example of a probe configuration file for features extracted using LLAVA-1.5-7b:
+```yaml
+model:
+ - activation: ReLU # a valid activation function from torch.nn
+ - hidden_size: 512 # the input and output size of the intermediate layers
+ - num_layers: 2 # the number of layers of the probe model
+ - save_dir: /path/to/save_dir # the location to save the probe results
+
+training:
+ - batch_size: [64, 128, 1024]
+ - num_epochs: [50, 100, 200]
+ - learning_rate: [0.001, 0.0005, 0.0001]
+ - optimizer: AdamW # a valid optimizer from torch.nn
+ - loss: CrossEntropyLoss # a valid loss metric from torch.nn
+
+test:
+ - batch_size: 32
+ - loss: CrossEntropyLoss # a valid loss metric from torch.nn
+
+data:
+ - input_db: /path/to/input_db
+ - db_name: tensors # the name of the database in input_db, the default is `tensors` if unspecified
+ - input_layer: language_model.model.layers.16.post_attention_layernorm # the layer value in input_db to filter by
+
+```
+
+> For `data`, the `input_layer` value must be specified as the probe model is only trained on data extracted from a single layer to avoid dimension mismatches or ambiguous results.
+
+## Training
+Note that the training procedure conducts an iterative (naive) hyperparameter search on all configurations of the `training` hyperparameters: `batch_size`, `num_epochs` and `learning_rate`. It uses $k$-fold cross validation (with a default of $k=5$) to store the lowest validation loss from each configuration and returns the configuration with the minimum validation loss.
+
+```yaml
+training:
+ - batch_size: [64, 128, 1024]
+ - num_epochs: [50, 100, 200]
+ - learning_rate: [0.001, 0.0005, 0.0001]
+ ...
+```
+
+## Testing
+After retrieving the best training configuration, the current script trains and tests a probe model on two versions of the data: a **Main** condition (where the data is untouched) and a **Shuffled** condition (where the labels are shuffled).
+
+This is based on work from [Hewitt and Liang (2019)](https://aclanthology.org/D19-1275/), where they propose the inclusion of "control tasks" to ensure that the probe is selective in its learning. The goal is to achieve high task-specific accuracy and low control task accuracy. The output attributes described in [Results](#results) follow the former experimental design.
+
+## Results
+Finally, the program saves both the probe model and result values in `save_dir`. The current script saves the attributes to a `{save_dir}/probe_data.txt`. The following is an example output:
+```python
+{
+ # The final train configuration with the lowest validation loss
+ "train_config": {
+ "batch_size": 64,
+ "num_epochs": 200,
+ "learning_rate": 0.0001,
+ "optimizer": "AdamW",
+ "loss": "CrossEntropyLoss"
+ },
+
+ # Probe accuracy on the shuffled data
+ "shuffle_accuracy": 0.3295668661594391,
+ "shuffle_loss": 2.1123009968163153,
+ "shuffle_preds": [1, 2, 0, ... ],
+ "shuffle_labels": [0, 0, 0, ... ],
+
+ # Probe accuracy on the original data
+ "test_accuracy": 0.6436911225318909,
+ "test_loss": 1.7786695135290789,
+ "test_preds": [2, 0, 2, ...],
+ "test_labels": [0, 0, 2, ...],
+
+ # The statistical significance of the difference between the shuffled and unshuffled data using a z-test
+ "pvalue": 4.8257723322914694e-116
+}
+```
+Details on the reason behind shuffling and experimental design can be found in the [Testing](#testing) section.
diff --git a/src/probe/main.py b/src/probe/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca735c73ee70d26b8637bc3cb2b57337da28721e
--- /dev/null
+++ b/src/probe/main.py
@@ -0,0 +1,520 @@
+"""Probe classes for information analysis in models.
+
+Example command: python -m src.probe.probe -c configs/probe/qwen/clevr-boolean-l13-example.yaml
+"""
+
+import argparse
+import io
+import itertools
+import json
+import logging
+import os
+import random
+import sqlite3
+from typing import Any, Dict, Optional
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import yaml
+from sklearn.model_selection import KFold, train_test_split
+from statsmodels.stats.proportion import proportions_ztest
+from torch.utils.data import DataLoader, Dataset, Subset, TensorDataset
+
+
+class ProbeConfig:
+ """Configuration class for the probe."""
+
+ def __init__(self) -> None:
+ """Initialize the configuration.
+
+ Raises:
+ ValueError: If the configuration file is not found.
+ """
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '-c', '--config', type=str, help='Path to the probe configuration file'
+ )
+
+ parser.add_argument(
+ '--debug',
+ default=False,
+ action='store_true',
+ help='Flag to print out debug statements',
+ )
+
+ parser.add_argument(
+ '-d',
+ '--device',
+ type=str,
+ default='cuda' if torch.cuda.is_available() else 'cpu',
+ help='The device to send the model and tensors to',
+ )
+
+ args = parser.parse_args()
+
+ assert args.config is not None, 'Config file must be provided.'
+ with open(args.config, 'r') as file:
+ data = yaml.safe_load(file)
+ for key in data.keys():
+ setattr(self, key, data[key])
+
+ # Set debug mode based on config
+ logging.getLogger().setLevel(logging.DEBUG if args.debug else logging.INFO)
+
+ # Load model device
+ if 'cuda' in args.device and not torch.cuda.is_available():
+ raise ValueError('No GPU found on this machine')
+
+ self.device = args.device
+ logging.debug(self.device)
+
+ # Load data mapping
+ assert (
+ hasattr(self, 'data')
+ ), 'The `data` field must be specified in the config, with an input database path.'
+
+ data_mapping = {}
+ for mapping in self.data:
+ data_mapping = {**data_mapping, **mapping}
+
+ # Check if specific layer in specified for the database
+ data_mapping.setdefault('input_layer', None)
+
+ # Set default database name if not specified
+ if 'db_name' not in data_mapping:
+ logging.debug(
+ 'Input database name attribute `db_name` not specified, setting to default `tensors`.')
+ data_mapping.setdefault('db_name', 'tensors')
+ self.data = data_mapping
+
+ # Load model mapping
+ model_mapping = {}
+ if hasattr(self, 'model'):
+ for mapping in self.model:
+ model_mapping = {**model_mapping, **mapping}
+
+ # Set default model config if not provided
+ # input_size and output_size will be set when the data is loaded
+ model_mapping.update({k: v for k, v in {
+ 'activation': 'ReLU',
+ 'hidden_size': 256,
+ 'num_layers': 2,
+ }.items() if k not in model_mapping})
+ logging.debug(model_mapping)
+ self.model = model_mapping
+
+ # Load training mapping
+ train_mapping = {}
+ if hasattr(self, 'training'):
+ for mapping in self.training:
+ train_mapping = {**train_mapping, **mapping}
+
+ logging.debug(train_mapping)
+ # Set default training config if not provided
+ train_mapping.update({k: v for k, v in {
+ 'optimizer': 'AdamW',
+ 'learning_rate': 1e-3,
+ 'loss': 'CrossEntropyLoss',
+ 'num_epochs': 10,
+ 'batch_size': 32
+ }.items() if k not in train_mapping})
+
+ self.training = train_mapping
+
+ # Load test mapping
+ test_mapping = {}
+ if hasattr(self, 'test'):
+ for mapping in self.test:
+ test_mapping = {**test_mapping, **mapping}
+
+ # Set default test config if not provided
+ test_mapping.update({k: v for k, v in {
+ 'optimizer': 'AdamW',
+ 'learning_rate': 1e-3,
+ 'loss': 'CrossEntropyLoss',
+ 'num_epochs': 10,
+ 'batch_size': 32
+ }.items() if k not in test_mapping})
+
+ self.test = test_mapping
+
+
+class Probe(nn.Module):
+ """Probe class for extracting information from models."""
+
+ def __init__(self, config: Dict[str, Any]) -> None:
+ """Intialize the probe with the given configuration.
+
+ Args:
+ config (Dict[str, Any]): Configuration dictionary for the probe.
+ """
+ super(Probe, self).__init__()
+ self.config = config
+
+ # Load input data to parse model input_size and output_size
+ self.data = self.load_data()
+
+ # Intialize the model
+ self.build_model()
+
+ def build_model(self) -> None:
+ """Builds the probe model from scratch."""
+ # Intialize probe model
+ layers = list()
+ layers.append(
+ nn.Linear(self.config.model['input_size'],
+ self.config.model['hidden_size'])
+ )
+ layers.append(getattr(nn, self.config.model['activation'])())
+
+ # Intialize intermediate layers based on config
+ for _ in range(self.config.model['num_layers'] - 2):
+ layers.append(
+ nn.Linear(self.config.model['hidden_size'],
+ self.config.model['hidden_size'])
+ )
+ layers.append(getattr(nn, self.config.model['activation'])())
+
+ # Final layer to output the desired size
+ layers.append(
+ nn.Linear(self.config.model['hidden_size'],
+ self.config.model['output_size'])
+ )
+
+ # Combine all layers to construct the model
+ self.model = nn.Sequential(*layers)
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """Forward pass of the probe model.
+
+ Args:
+ x (torch.Tensor): Input tensor.
+
+ Returns:
+ torch.Tensor: Output tensor.
+ """
+ logging.debug('Forward pass with input: %s', x.shape)
+ return self.model(x)
+
+ def load_data(self, shuffle: bool = False) -> TensorDataset:
+ """Load tensors from the database.
+
+ Args:
+ shuffle (bool): Whether to shuffle the data.
+
+ Returns:
+ TensorDataset: A dataset containing the loaded tensors.
+ """
+ logging.debug('Loading tensors from the database...')
+ # Connect to database
+ connection = sqlite3.connect(self.config.data['input_db'])
+ cursor = connection.cursor()
+
+ # Build query and fetch results
+ cursor.execute(
+ f"SELECT layer, tensor, label FROM {self.config.data['db_name']}"
+ )
+ results = cursor.fetchall()
+
+ # Close the connection
+ connection.close()
+
+ # Gather unique class labels
+ all_labels = set([result[2] for result in results])
+ self.config.model.setdefault('output_size', len(all_labels))
+ assert (
+ 'output_size' in self.config.model and len(
+ all_labels) == self.config.model['output_size']
+ ), 'Input attribute `output_size` does not match number of classes in dataset. Leave blank to assign automatically.'
+
+ # Label to index mapping
+ label_to_idx = {label: i for i, label in enumerate(all_labels)}
+
+ features, targets = [], []
+ probe_layer = self.config.data.get('input_layer', None)
+ if not probe_layer:
+ logging.debug(
+ 'No `input_layer` attribute provided for database loading, extracting all tensors...')
+
+ input_size = self.config.data.get('input_size', None)
+ for layer, tensor_bytes, label in results:
+ if (probe_layer and layer == probe_layer) or (not probe_layer):
+ tensor = torch.load(io.BytesIO(tensor_bytes),
+ map_location=self.config.device)
+ if tensor.ndim > 2:
+ # Apply mean pooling if tensor is not already pooled
+ tensor = tensor.mean(dim=1)
+ # Squeeze to shape (hidden_dim)
+ tensor = tensor.squeeze()
+
+ if not input_size:
+ # Set model config input_size once
+ input_size = tensor.shape[0] # pooled tensor
+ self.config.model.setdefault('input_size', input_size)
+ assert (
+ 'input_size' in self.config.model and input_size == self.config.model[
+ 'input_size']
+ ), 'Input attribute `input_size` does not match input tensor dimension. Leave blank to assign automatically.'
+
+ features.append(tensor)
+ targets.append(label_to_idx[label])
+
+ if shuffle:
+ random.shuffle(targets)
+
+ # Stack lists into batched tensors
+ X = torch.stack(features)
+ Y = torch.tensor(targets)
+ logging.debug(f'Features shape {X.shape}, Targets shape {Y.shape}')
+
+ # Move tensors to same device as model
+ X, Y = X.to(self.config.device), Y.to(self.config.device)
+
+ return TensorDataset(X, Y)
+
+ def cross_validate(self, config: dict, data: Dataset, nfolds: Optional[int] = 5) -> float:
+ """Trains the model using the config hyperparameters across k folds.
+
+ Args:
+ config (dict): The configuration dictionary.
+ data (Dataset): The dataset to train on.
+ nfolds (Optional[int]): The number of folds for cross-validation.
+
+ Returns:
+ float: The average validation loss across all folds.
+ """
+ kf = KFold(n_splits=nfolds, shuffle=True, random_state=42)
+ val_losses = []
+ for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(data)))):
+ logging.debug(f'===Starting fold {fold}/{nfolds}===')
+ train_set, val_set = Subset(data, train_idx), Subset(data, val_idx)
+
+ # Reinitialize model after each fold to prevent contamination
+ self.build_model()
+
+ result = self.train(config, train_set, val_set)
+ val_losses.append(result['val_loss'] * len(val_set))
+
+ # Return the mean validation loss across all folds
+ return sum(val_losses) / len(data)
+
+ def train(self, train_config: dict, train_set: Dataset, val_set: Optional[Dataset] = None) -> dict:
+ """Train the probe model.
+
+ Args:
+ train_config (dict): The training configuration.
+ train_set (Dataset): The training dataset.
+ val_set (Dataset, optional): The validation dataset.
+
+ Returns:
+ dict: The training results, including validation loss and accuracy.
+ """
+ logging.debug(
+ f'Training the probe model with config {train_config}...')
+
+ # Set the device
+ device = torch.device(self.config.device)
+ self.model.to(device)
+
+ # Initialize the optimizer
+ optimizer_class = getattr(optim, train_config['optimizer'])
+ optimizer = optimizer_class(
+ self.parameters(), lr=train_config['learning_rate'])
+
+ # Intialize the loss function
+ loss_fn = getattr(nn, train_config['loss'])()
+ train_loader = DataLoader(
+ train_set, batch_size=train_config['batch_size'], shuffle=True)
+
+ for epoch in range(train_config['num_epochs']):
+ # Set the model to training mode
+ self.model.train()
+ total_loss = 0
+ for X, Y in train_loader:
+ optimizer.zero_grad()
+
+ outputs = self.model(X.float())
+ loss = loss_fn(outputs, Y)
+
+ loss.backward()
+ optimizer.step()
+
+ total_loss += loss.item() * X.size(0)
+
+ mean_train_loss = total_loss / len(train_set)
+ logging.debug(
+ f"--Epoch {epoch + 1}/{train_config['num_epochs']}: Train loss: {mean_train_loss:.4f}")
+
+ if val_set:
+ val_loader = DataLoader(
+ val_set, batch_size=train_config['batch_size'])
+ # Set model to eval mode and calculate validation loss
+ self.model.eval()
+ val_loss = 0
+ preds, labels = [], []
+ with torch.no_grad():
+ for X_val, Y_val in val_loader:
+ outputs = self.model(X_val.float())
+ loss = loss_fn(outputs, Y_val)
+ val_loss += loss.item() * X_val.size(0)
+
+ preds.append(outputs)
+ labels.append(Y_val)
+
+ preds = torch.cat(preds, dim=0)
+ labels = torch.cat(labels, dim=0)
+
+ val_loss = val_loss / len(val_set)
+ val_acc = (preds.argmax(dim=1) == labels).float().mean().item()
+ logging.debug(
+ f'Validation accuracy: {val_acc}, Validation mean loss: {val_loss}')
+
+ return {'preds': preds, 'labels': labels, 'val_loss': val_loss, 'val_acc': val_acc}
+
+ # TODO: Return train details here
+ return {}
+
+ def evaluate(self, test_set: Dataset) -> dict:
+ """Evaluate the probe model on the input test set.
+
+ Args:
+ test_set (Dataset): The test dataset.
+
+ Returns:
+ dict: The evaluation results, including loss and accuracy.
+ """
+ self.model.eval()
+
+ device = torch.device(self.config.device)
+ self.model.to(device)
+
+ test_config = self.config.test
+ test_loader = DataLoader(
+ test_set, batch_size=test_config['batch_size'])
+
+ loss_fn = getattr(nn, test_config['loss'])()
+ total_loss = 0.0
+ num_correct, num_samples = 0, 0
+ all_preds, all_labels = [], []
+ with torch.no_grad():
+ for X, Y in test_loader:
+ outputs = self.model(X.float())
+ loss = loss_fn(outputs, Y)
+ total_loss += loss.item() * X.size(0) # to account for incomplete batches
+
+ preds = outputs.argmax(dim=1)
+ num_correct += (preds == Y).sum()
+ num_samples += Y.size(0)
+
+ all_preds.append(preds)
+ all_labels.append(Y)
+
+ mean_loss = float(total_loss / len(test_set))
+ accuracy = float(num_correct / num_samples)
+
+ all_preds = torch.cat(all_preds, dim=0).cpu().numpy()
+ all_labels = torch.cat(all_labels, dim=0).cpu().numpy()
+ logging.debug(
+ f'Test accuracy: {accuracy}, Test mean loss: {mean_loss}')
+ return {'accuracy': accuracy,
+ 'loss': mean_loss,
+ 'labels': all_labels,
+ 'preds': all_preds}
+
+ def save_model(self, metadata: Optional[dict] = None) -> None:
+ """Saves the trained model to a user-specified path.
+
+ Args:
+ metadata (Optional[dict]): Metadata to save alongside the model.
+ """
+ save_dir = self.config.model.get('save_dir') or 'probe_output'
+ os.makedirs(save_dir, exist_ok=True)
+
+ save_path = os.path.join(save_dir, 'probe.pth')
+ try:
+ torch.save(self.model.state_dict(), save_path)
+ logging.debug(f'Model saved to {save_path}')
+ except Exception as e:
+ logging.error(f'Failed to save probe model: {e}')
+
+ if metadata:
+ try:
+ data_path = os.path.join(save_dir, 'probe_data.json')
+ with open(data_path, 'w') as f:
+ f.write(json.dumps(metadata, indent=2))
+ logging.debug(f'Probe metadata saved to {data_path}')
+ except Exception as e:
+ logging.error(f'Failed to save metadata: {e}')
+
+
+def main() -> None:
+ """Main function to run the probe."""
+ config = ProbeConfig()
+ probe = Probe(config)
+
+ # Load data and split into train/val and test
+ data = probe.data
+ indices = list(range(len(data)))
+
+ train_idx, test_idx = train_test_split(
+ indices, test_size=0.2, random_state=42)
+ train_set, test_set = Subset(data, train_idx), Subset(data, test_idx)
+
+ # Load all combinations of hyperparameters
+ train_keys = list(config.training.keys())
+ train_configs = list(itertools.product(
+ *[[config.training[k]] if not isinstance(config.training[k], list) else config.training[k] for k in train_keys]))
+ logging.debug(
+ f'Hyperparamer tuning using {len(train_configs)} config combinations...')
+
+ # Train using k-fold cross validation on all configs and store the lowest validation losses
+ val_losses = []
+ for config in train_configs:
+ val_loss = probe.cross_validate(
+ dict(zip(train_keys, config)), train_set)
+ val_losses.append(val_loss)
+
+ # Finally, train the model on the whole train_set using best config
+ min_idx = val_losses.index(min(val_losses))
+ final_config = dict(zip(train_keys, train_configs[min_idx]))
+ logging.debug(
+ f'Model config results after hyperparameter tuning: {final_config}')
+
+ # Shuffle the data and train the model again to test generalization
+ shffl_data = probe.load_data(shuffle=True)
+ shuffl_train, shuffl_test = Subset(
+ shffl_data, train_idx), Subset(shffl_data, test_idx)
+
+ probe.build_model()
+ probe.train(final_config, shuffl_train)
+ shffl_results = probe.evaluate(shuffl_test)
+
+ # Reinitialize model to finally train with best config
+ probe.build_model()
+ probe.train(final_config, train_set)
+ test_results = probe.evaluate(test_set)
+
+ # Calculate p-value using proportions z-test
+ shffl_correct = (shffl_results['preds'] == shffl_results['labels']).sum()
+ test_correct = (test_results['preds'] == test_results['labels']).sum()
+ pvalue = proportions_ztest([test_correct, shffl_correct],
+ [len(test_results['preds']), len(shffl_results['preds'])])[1]
+
+ # Save results to file with non-shuffled model to file
+ probe.save_model({'train_config': final_config,
+ 'shuffle_accuracy': shffl_results['accuracy'],
+ 'shuffle_loss': shffl_results['loss'],
+ 'shuffle_preds': shffl_results['preds'].tolist(),
+ 'shuffle_labels': shffl_results['labels'].tolist(),
+ 'test_accuracy': test_results['accuracy'],
+ 'test_loss': test_results['loss'],
+ 'test_preds': test_results['preds'].tolist(),
+ 'test_labels': test_results['labels'].tolist(),
+ 'pvalue': pvalue})
+
+ # TODO: implement a demo
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c173deb29795f605f72c2673c9687734dab11875
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,43 @@
+"""Utility functions for interacting with the SQLite database."""
+import io
+import logging
+import sqlite3
+from typing import Any, List, Optional
+
+import torch
+
+
+def select_tensors(
+ db_path: str,
+ table_name: str,
+ keys: List[str] = ['layer', 'pooling_method', 'tensor_dim', 'tensor'],
+ sql_where: Optional[str] = None,
+ ) -> List[Any]:
+ """Select and return all tensors from the specified SQLite database and table.
+
+ Args:
+ db_path (str): Path to the SQLite database file.
+ table_name (str): Name of the table to query.
+ keys (List[str]): List of keys to select from the database.
+ sql_where (str): Optional SQL WHERE clause to filter results.
+
+ Returns:
+ List[Any]: A list of tensors retrieved from the database.
+ """
+ if 'tensor' not in keys:
+ logging.warning("'tensor' key should be included to retrieve tensors; automatically adding it.")
+ keys.append('tensor')
+ final_results = []
+ with sqlite3.connect(db_path) as connection:
+ cursor = connection.cursor()
+ query = f'SELECT {", ".join(keys)} FROM {table_name}'
+ if sql_where:
+ assert sql_where.strip().lower().startswith('where'), "sql_where should start with 'WHERE'"
+ query += f' {sql_where}'
+ cursor.execute(query)
+ results = cursor.fetchall()
+ for row in results:
+ result_item = {key: value for key, value in zip(keys, row)}
+ result_item['tensor'] = torch.load(io.BytesIO(result_item['tensor']), map_location='cpu')
+ final_results.append(result_item)
+ return final_results