Zaynes commited on 11 days ago

Commit

2203975

verified ·

1 Parent(s): 446c818

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

.gitattributes +8 -35
Modelfile +16 -0
added_tokens.json +24 -0
chat_template.jinja +54 -0
config.json +58 -0
generation_config.json +14 -0
merges.txt +0 -0
model.safetensors +3 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +208 -0
training_artifacts/README.md +16 -0
training_artifacts/hydra_config.yaml +134 -0
training_artifacts/logs/pipeline_cleaned.txt +708 -0
training_artifacts/merge_config.yaml +5 -0
training_artifacts/train_config.yaml +32 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,8 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+# Mark all log files as text to prevent binary file issues
+*.log text
+*.txt text
+*.out text
+*.err text
+training_artifacts/logs/* text
+model.safetensors filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

Modelfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# ollama modelfile auto-generated by llamafactory
+FROM .
+TEMPLATE """{{ if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ range .Messages }}{{ if eq .Role "user" }}<|im_start|>user
+{{ .Content }}<|im_end|>
+<|im_start|>assistant
+{{ else if eq .Role "assistant" }}{{ .Content }}<|im_end|>
+{{ end }}{{ end }}"""
+SYSTEM """You are Qwen, created by Alibaba Cloud. You are a helpful assistant."""
+PARAMETER stop "<|im_end|>"
+PARAMETER num_ctx 4096

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,54 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- messages[0]['content'] }}
+    {%- else %}
+        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
+    {%- endif %}
+    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content %}
+            {{- '\n' + message.content }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n{"name": "' }}
+            {{- tool_call.name }}
+            {{- '", "arguments": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- '}\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.1,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.57.1"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4667e91af8fd2f29b75aae0c0d91510f0fb65cd6ca6ae30755afad87f358c287
+size 3087467144

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

training_artifacts/README.md ADDED Viewed

	@@ -0,0 +1,16 @@

+# Training Artifacts
+This directory contains the training configuration and logs for this model.
+## Contents
+- **hydra_config.yaml**: Complete Hydra configuration used for training
+- **train_config.yaml**: LlamaFactory training configuration
+- **merge_config.yaml**: LlamaFactory merge/export configuration
+- **logs/**: Training logs from the job (cleaned for text format)
+## Job Information
+- Job Name: testing__pvv2_lora
+- Timestamp: 2025-10-25 03:56:22 UTC
+- Execution Mode: Local

training_artifacts/hydra_config.yaml ADDED Viewed

	@@ -0,0 +1,134 @@

+_target_: null
+job:
+  name: testing__pvv2_lora
+  mode: local
+  dry_run: false
+  work_dir: null
+slurm:
+  time_limit: null
+  constraint: null
+  memory: null
+  cpus_per_task: 16
+  partition: null
+  mail_user: null
+execution:
+  nodes: 1
+  gpus_per_node: 2
+  num_gpus: null
+  hostfile: null
+  secrets_file: ./secrets.env
+model:
+  name_or_path: Qwen/Qwen2.5-1.5B-Instruct
+  finetuning_type: lora
+dataset:
+  name: TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
+  dir: null
+  info_json: null
+  template: qwen
+  cutoff_len: 16192
+  val_size: 0.0
+  tokenized_path: /scratch/zrs2020/.cache/llamafactory/tokenized/TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
+  hf_hub_url: TAUR-dev/D-SFT_C-ours_cd3arg_10responses_reflections10_formats-C_full
+  formatting: sharegpt
+  ranking: false
+  subset: null
+  split: train
+  folder: null
+  num_samples: null
+  columns:
+    prompt: null
+    query: null
+    response: null
+    history: null
+    messages: conversations
+    system: null
+    tools: null
+    images: null
+    videos: null
+    audios: null
+    chosen: null
+    rejected: null
+    kto_tag: null
+  tags:
+    role: role
+    content: content
+    user: user
+    assistant: assistant
+    observation: null
+    function: null
+    system: null
+output:
+  experiment_dir: ./experiments
+wandb:
+  project: null
+  run_name: testing__pvv2_lora
+  entity: null
+hf:
+  repo_id: TAUR-dev/testing__pvv2_lora
+  private: false
+  upload_artifacts: true
+cleanup:
+  checkpoints: false
+  merged: false
+training:
+  stage: sft
+  do_train: true
+  max_samples: 100000
+  do_eval: false
+  save_strategy: steps
+  save_steps: 5
+  logging_steps: 10
+  fp16: false
+  bf16: true
+  adam_beta1: 0.9
+  adam_beta2: 0.95
+  overwrite_output_dir: true
+  per_device_train_batch_size: 1
+  gradient_accumulation_steps: 1
+  gradient_checkpointing: true
+  learning_rate: 1.0e-06
+  lr_scheduler_type: cosine
+  num_train_epochs: 2
+  warmup_ratio: 0.05
+  weight_decay: 0.0001
+  template: qwen
+  max_steps: 10
+  preprocessing_num_workers: 16
+  overwrite_cache: true
+finetuning:
+  training:
+    stage: sft
+    do_train: true
+    finetuning_type: lora
+    lora_rank: 8
+    lora_alpha: 16
+    lora_dropout: 0.05
+    lora_target: all
+    overwrite_cache: true
+    preprocessing_num_workers: 16
+    dataloader_num_workers: 4
+    logging_steps: 10
+    save_steps: 500
+    plot_loss: true
+    overwrite_output_dir: true
+    save_only_model: false
+    report_to: none
+    per_device_train_batch_size: 1
+    gradient_accumulation_steps: 8
+    learning_rate: 0.0001
+    num_train_epochs: 3.0
+    lr_scheduler_type: cosine
+    warmup_ratio: 0.1
+    bf16: true
+    ddp_timeout: 180000000
+    resume_from_checkpoint: null
+    val_size: 0.1
+    per_device_eval_batch_size: 1
+    eval_strategy: steps
+    eval_steps: 500
+    do_eval: true
+  merge:
+    export_dir: null
+    export_size: 5
+    export_device: cpu
+    export_legacy_format: false

training_artifacts/logs/pipeline_cleaned.txt ADDED Viewed

	@@ -0,0 +1,708 @@

+[2025-10-24 23:55:28] ========================================
+[2025-10-24 23:55:28] Job Name: testing__pvv2_lora
+[2025-10-24 23:55:28] Hostname: gl007.hpc.nyu.edu
+[2025-10-24 23:55:28] Number of nodes: 1
+[2025-10-24 23:55:28] GPUs per node: 2
+[2025-10-24 23:55:28] Start Time: Fri Oct 24 11:55:28 PM EDT 2025
+[2025-10-24 23:55:28] Log file: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/logs/pipeline.log
+[2025-10-24 23:55:28] ========================================
+[2025-10-24 23:55:28] Sourcing secrets from: /scratch/zrs2020/LlamaFactoryHelper/secrets.env
+[2025-10-24 23:55:30]
+[2025-10-24 23:55:30] ========================================
+[2025-10-24 23:55:30] Configuration Paths
+[2025-10-24 23:55:30] ========================================
+[2025-10-24 23:55:30] Train Config: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/configs/train_config.yaml
+[2025-10-24 23:55:30] Merge Config: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/configs/merge_config.yaml
+[2025-10-24 23:55:30] Dataset Info: /scratch/zrs2020/LlamaFactoryHelper/LLaMA-Factory/data/dataset_info.json
+[2025-10-24 23:55:30] Output Dir: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints
+[2025-10-24 23:55:30] Export Dir: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged
+[2025-10-24 23:55:30] HF Repo ID: TAUR-dev/testing__pvv2_lora
+[2025-10-24 23:55:30]
+[make-effective-cfg] tokenized_path: /scratch/zrs2020/.cache/hf_cache/home/llamafactory/tokenized/TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full_fb94f2a3
+[make-effective-cfg] wrote: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/logs/train_config.effective.yaml
+[2025-10-24 23:55:30]
+[2025-10-24 23:55:30] ========================================
+[2025-10-24 23:55:30] STAGE 0: Downloading Dataset
+[2025-10-24 23:55:30] Dataset: TAUR-dev/D-SFT_C-ours_cd3arg_10responses_reflections10_formats-C_full
+[2025-10-24 23:55:30] Start Time: Fri Oct 24 11:55:30 PM EDT 2025
+[2025-10-24 23:55:30] ========================================
+[dataset-download] Loading dataset from: TAUR-dev/D-SFT_C-ours_cd3arg_10responses_reflections10_formats-C_full
+[dataset-download] Dataset loaded successfully
+[dataset-download] Dataset info: DatasetDict({
+    train: Dataset({
+        features: ['conversations', 'sft_template_type_idx'],
+        num_rows: 29130
+    })
+})
+[2025-10-24 23:55:32]
+[2025-10-24 23:55:32] ========================================
+[2025-10-24 23:55:32] Dataset download completed
+[2025-10-24 23:55:32] End Time: Fri Oct 24 11:55:32 PM EDT 2025
+[2025-10-24 23:55:32] ========================================
+[2025-10-24 23:55:32]
+[2025-10-24 23:55:32] ========================================
+[2025-10-24 23:55:32] STAGE 1: Training Model
+[2025-10-24 23:55:32] Start Time: Fri Oct 24 11:55:32 PM EDT 2025
+[2025-10-24 23:55:32] ========================================
+[2025-10-24 23:55:32] Job: testing__pvv2_lora
+[2025-10-24 23:55:32] Nodes: 1  |  GPUs/node: 2
+[2025-10-24 23:55:32] Master: 127.0.0.1:29500
+[2025-10-24 23:55:32] LLaMA-Factory: /scratch/zrs2020/LlamaFactoryHelper/LLaMA-Factory
+[2025-10-24 23:55:32] Train cfg (effective): /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/logs/train_config.effective.yaml
+[2025-10-24 23:55:32] HF cache: /scratch/zrs2020/.cache/hf_cache/home/datasets
+[2025-10-24 23:55:32] Launcher: torchrun
+[2025-10-24 23:55:32]
+[2025-10-24 23:55:32] Single-node training (2 GPU(s))
+[2025-10-24 23:55:32] Executing command: llamafactory-cli train /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/logs/train_config.effective.yaml
+/scratch/zrs2020/miniconda/miniconda3/envs/llamafactory/lib/python3.12/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+[INFO|2025-10-24 23:55:40] llamafactory.launcher:143 >> Initializing 2 distributed tasks at: 127.0.0.1:29500
+W1024 23:55:41.864000 3022854 site-packages/torch/distributed/run.py:803]
+W1024 23:55:41.864000 3022854 site-packages/torch/distributed/run.py:803] *****************************************
+W1024 23:55:41.864000 3022854 site-packages/torch/distributed/run.py:803] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
+W1024 23:55:41.864000 3022854 site-packages/torch/distributed/run.py:803] *****************************************
+/scratch/zrs2020/miniconda/miniconda3/envs/llamafactory/lib/python3.12/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/scratch/zrs2020/miniconda/miniconda3/envs/llamafactory/lib/python3.12/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/scratch/zrs2020/miniconda/miniconda3/envs/llamafactory/lib/python3.12/site-packages/jieba/_compat.py:18: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+  import pkg_resources
+/scratch/zrs2020/miniconda/miniconda3/envs/llamafactory/lib/python3.12/site-packages/jieba/_compat.py:18: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+  import pkg_resources
+[W1024 23:55:50.757874363 ProcessGroupNCCL.cpp:924] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator())
+[W1024 23:55:50.757887679 ProcessGroupNCCL.cpp:924] Warning: TORCH_NCCL_AVOID_RECORD_STREAMS is the default now, this environment variable is thus deprecated. (function operator())
+[INFO|2025-10-24 23:55:50] llamafactory.hparams.parser:143 >> Set `ddp_find_unused_parameters` to False in DDP training since LoRA is enabled.
+[INFO|2025-10-24 23:55:50] llamafactory.hparams.parser:423 >> Process rank: 0, world size: 2, device: cuda:0, distributed training: True, compute dtype: torch.bfloat16
+[INFO|2025-10-24 23:55:50] llamafactory.hparams.parser:423 >> Process rank: 1, world size: 2, device: cuda:1, distributed training: True, compute dtype: torch.bfloat16
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,441 >> loading file vocab.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/vocab.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,442 >> loading file merges.txt from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/merges.txt
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,442 >> loading file tokenizer.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/tokenizer.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,442 >> loading file added_tokens.json from cache at None
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,442 >> loading file special_tokens_map.json from cache at None
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,442 >> loading file tokenizer_config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,442 >> loading file chat_template.jinja from cache at None
+[INFO|tokenization_utils_base.py:2364] 2025-10-24 23:55:50,609 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+[INFO|configuration_utils.py:765] 2025-10-24 23:55:50,826 >> loading configuration file config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/config.json
+[INFO|configuration_utils.py:839] 2025-10-24 23:55:50,828 >> Model config Qwen2Config {
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,899 >> loading file vocab.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/vocab.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,899 >> loading file merges.txt from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/merges.txt
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,899 >> loading file tokenizer.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/tokenizer.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,899 >> loading file added_tokens.json from cache at None
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,899 >> loading file special_tokens_map.json from cache at None
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,899 >> loading file tokenizer_config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:55:50,899 >> loading file chat_template.jinja from cache at None
+[INFO|tokenization_utils_base.py:2364] 2025-10-24 23:55:51,063 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+[WARNING|2025-10-24 23:55:51] llamafactory.data.loader:148 >> Loading dataset from disk will ignore other data arguments.
+[INFO|2025-10-24 23:55:51] llamafactory.data.loader:143 >> Loaded tokenized dataset from /scratch/zrs2020/.cache/hf_cache/home/llamafactory/tokenized/TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full_fb94f2a3.
+[INFO|configuration_utils.py:765] 2025-10-24 23:55:51,138 >> loading configuration file config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/config.json
+[INFO|configuration_utils.py:839] 2025-10-24 23:55:51,138 >> Model config Qwen2Config {
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
+[INFO|2025-10-24 23:55:51] llamafactory.model.model_utils.kv_cache:143 >> KV cache is disabled during training.
+[WARNING|logging.py:328] 2025-10-24 23:55:51,492 >> `torch_dtype` is deprecated! Use `dtype` instead!
+[INFO|modeling_utils.py:1172] 2025-10-24 23:55:51,493 >> loading weights file model.safetensors from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/model.safetensors
+[INFO|modeling_utils.py:2341] 2025-10-24 23:55:51,494 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.
+[INFO|configuration_utils.py:986] 2025-10-24 23:55:51,495 >> Generate config GenerationConfig {
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "use_cache": false
+}
+`torch_dtype` is deprecated! Use `dtype` instead!
+[INFO|configuration_utils.py:941] 2025-10-24 23:55:52,421 >> loading configuration file generation_config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/generation_config.json
+[INFO|configuration_utils.py:986] 2025-10-24 23:55:52,421 >> Generate config GenerationConfig {
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.1,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8
+}
+[INFO|dynamic_module_utils.py:423] 2025-10-24 23:55:52,453 >> Could not locate the custom_generate/generate.py inside Qwen/Qwen2.5-1.5B-Instruct.
+[INFO|2025-10-24 23:55:52] llamafactory.model.model_utils.checkpointing:143 >> Gradient checkpointing enabled.
+[INFO|2025-10-24 23:55:52] llamafactory.model.model_utils.attention:143 >> Using torch SDPA for faster training and inference.
+[INFO|2025-10-24 23:55:52] llamafactory.model.adapter:143 >> Upcasting trainable params to float32.
+[INFO|2025-10-24 23:55:52] llamafactory.model.adapter:143 >> Fine-tuning method: LoRA
+[INFO|2025-10-24 23:55:52] llamafactory.model.model_utils.misc:143 >> Found linear modules: o_proj,gate_proj,q_proj,down_proj,v_proj,k_proj,up_proj
+[INFO|2025-10-24 23:55:52] llamafactory.model.loader:143 >> trainable params: 9,232,384 || all params: 1,552,946,688 || trainable%: 0.5945
+[WARNING|trainer.py:906] 2025-10-24 23:55:52,738 >> The model is already on multiple devices. Skipping the move to device specified in `args`.
+[INFO|trainer.py:699] 2025-10-24 23:55:52,740 >> max_steps is given, it will override any value given in num_train_epochs
+[INFO|trainer.py:749] 2025-10-24 23:55:52,740 >> Using auto half precision backend
+[WARNING|trainer.py:982] 2025-10-24 23:55:52,742 >> The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
+The model is already on multiple devices. Skipping the move to device specified in `args`.
+The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
+NCCL version 2.27.5+cuda12.9
+[INFO|trainer.py:2519] 2025-10-24 23:55:53,120 >> ***** Running training *****
+[INFO|trainer.py:2520] 2025-10-24 23:55:53,120 >>   Num examples = 29,130
+[INFO|trainer.py:2521] 2025-10-24 23:55:53,120 >>   Num Epochs = 1
+[INFO|trainer.py:2522] 2025-10-24 23:55:53,120 >>   Instantaneous batch size per device = 1
+[INFO|trainer.py:2525] 2025-10-24 23:55:53,120 >>   Total train batch size (w. parallel, distributed & accumulation) = 2
+[INFO|trainer.py:2526] 2025-10-24 23:55:53,120 >>   Gradient Accumulation steps = 1
+[INFO|trainer.py:2527] 2025-10-24 23:55:53,120 >>   Total optimization steps = 10
+[INFO|trainer.py:2528] 2025-10-24 23:55:53,122 >>   Number of trainable parameters = 9,232,384
+[INFO|integration_utils.py:867] 2025-10-24 23:55:53,220 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
+wandb: Currently logged in as: zsprague (ut_nlp_deduce) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin
+wandb: Tracking run with wandb version 0.22.2
+wandb: Run data is saved locally in /scratch/zrs2020/LlamaFactoryHelper/wandb/run-20251024_235553-oqx8ngeo
+wandb: Run `wandb offline` to turn off syncing.
+wandb: Syncing run testing__pvv2_lora
+wandb:  View project at https://wandb.ai/ut_nlp_deduce/llamafactory
+wandb:  View run at https://wandb.ai/ut_nlp_deduce/llamafactory/runs/oqx8ngeo
+  0%|          | 0/10 [00:00<?, ?it/s] 10%|         | 1/10 [00:01<00:10,  1.22s/it] 20%|        | 2/10 [00:01<00:06,  1.27it/s] 30%|       | 3/10 [00:02<00:04,  1.68it/s] 40%|      | 4/10 [00:03<00:04,  1.37it/s] 50%|     | 5/10 [00:03<00:03,  1.49it/s][INFO|trainer.py:4309] 2025-10-24 23:55:57,737 >> Saving model checkpoint to /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-5
+[INFO|configuration_utils.py:765] 2025-10-24 23:55:57,839 >> loading configuration file config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/config.json
+[INFO|configuration_utils.py:839] 2025-10-24 23:55:57,840 >> Model config Qwen2Config {
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
+[INFO|tokenization_utils_base.py:2421] 2025-10-24 23:55:58,067 >> chat template saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-5/chat_template.jinja
+[INFO|tokenization_utils_base.py:2590] 2025-10-24 23:55:58,072 >> tokenizer config file saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-5/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2599] 2025-10-24 23:55:58,076 >> Special tokens file saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-5/special_tokens_map.json
+ 60%|    | 6/10 [00:05<00:04,  1.18s/it] 70%|   | 7/10 [00:06<00:02,  1.11it/s] 80%|  | 8/10 [00:06<00:01,  1.23it/s] 90%| | 9/10 [00:07<00:00,  1.45it/s]100%|| 10/10 [00:08<00:00,  1.23it/s]                                               {'loss': 0.7188, 'grad_norm': 0.2177160233259201, 'learning_rate': 3.015368960704584e-08, 'epoch': 0.0}
+100%|| 10/10 [00:08<00:00,  1.23it/s][INFO|trainer.py:4309] 2025-10-24 23:56:02,371 >> Saving model checkpoint to /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10
+[INFO|configuration_utils.py:765] 2025-10-24 23:56:02,490 >> loading configuration file config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/config.json
+[INFO|configuration_utils.py:839] 2025-10-24 23:56:02,491 >> Model config Qwen2Config {
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
+[INFO|tokenization_utils_base.py:2421] 2025-10-24 23:56:02,701 >> chat template saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10/chat_template.jinja
+[INFO|tokenization_utils_base.py:2590] 2025-10-24 23:56:02,706 >> tokenizer config file saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2599] 2025-10-24 23:56:02,710 >> Special tokens file saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10/special_tokens_map.json
+[INFO|trainer.py:2810] 2025-10-24 23:56:03,258 >>
+Training completed. Do not forget to share your model on huggingface.co/models =)
+                                               {'train_runtime': 10.137, 'train_samples_per_second': 1.973, 'train_steps_per_second': 0.986, 'train_loss': 0.718793535232544, 'epoch': 0.0}
+100%|| 10/10 [00:09<00:00,  1.23it/s]100%|| 10/10 [00:09<00:00,  1.10it/s]
+[INFO|trainer.py:4309] 2025-10-24 23:56:03,267 >> Saving model checkpoint to /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints
+[INFO|configuration_utils.py:765] 2025-10-24 23:56:03,356 >> loading configuration file config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/config.json
+[INFO|configuration_utils.py:839] 2025-10-24 23:56:03,357 >> Model config Qwen2Config {
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
+[INFO|tokenization_utils_base.py:2421] 2025-10-24 23:56:03,588 >> chat template saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/chat_template.jinja
+[INFO|tokenization_utils_base.py:2590] 2025-10-24 23:56:03,592 >> tokenizer config file saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2599] 2025-10-24 23:56:03,596 >> Special tokens file saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/special_tokens_map.json
+***** train metrics *****
+  epoch                    =     0.0007
+  total_flos               =   414519GF
+  train_loss               =     0.7188
+  train_runtime            = 0:00:10.13
+  train_samples_per_second =      1.973
+  train_steps_per_second   =      0.986
+[INFO|modelcard.py:456] 2025-10-24 23:56:03,838 >> Dropping the following result as it does not have all the necessary fields:
+{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}
+[W1024 23:56:04.029787829 AllocatorConfig.cpp:28] Warning: PYTORCH_CUDA_ALLOC_CONF is deprecated, use PYTORCH_ALLOC_CONF instead (function operator())
+[1;34mwandb[0m:
+[1;34mwandb[0m:  View run [33mtesting__pvv2_lora[0m at: [34m[0m
+[1;34mwandb[0m: Find logs at: [1;35mwandb/run-20251024_235553-oqx8ngeo/logs[0m
+[W1024 23:56:05.730735839 AllocatorConfig.cpp:28] Warning: PYTORCH_CUDA_ALLOC_CONF is deprecated, use PYTORCH_ALLOC_CONF instead (function operator())
+[W1024 23:56:05.132682733 AllocatorConfig.cpp:28] Warning: PYTORCH_CUDA_ALLOC_CONF is deprecated, use PYTORCH_ALLOC_CONF instead (function operator())
+[W1024 23:56:05.555229777 AllocatorConfig.cpp:28] Warning: PYTORCH_CUDA_ALLOC_CONF is deprecated, use PYTORCH_ALLOC_CONF instead (function operator())
+[2025-10-24 23:56:06]
+[2025-10-24 23:56:06] ========================================
+[2025-10-24 23:56:06] Training completed successfully
+[2025-10-24 23:56:06] End Time: Fri Oct 24 11:56:06 PM EDT 2025
+[2025-10-24 23:56:06] ========================================
+[2025-10-24 23:56:06]
+[2025-10-24 23:56:06] ========================================
+[2025-10-24 23:56:06] STAGE 2: Merging/Exporting Model
+[2025-10-24 23:56:06] Start Time: Fri Oct 24 11:56:06 PM EDT 2025
+[2025-10-24 23:56:06] ========================================
+[2025-10-24 23:56:06] Looking for checkpoints in: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints
+[2025-10-24 23:56:06] Analyzing checkpoints to find the one from current training run...
+[2025-10-24 23:56:06]   - checkpoint-10: trainer_state.json modified at Fri Oct 24 11:56:03 PM EDT 2025
+[2025-10-24 23:56:06]   - checkpoint-5: trainer_state.json modified at Fri Oct 24 11:55:58 PM EDT 2025
+[2025-10-24 23:56:06]
+[2025-10-24 23:56:06] Selected checkpoint: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10
+[2025-10-24 23:56:06] This checkpoint has the most recently updated trainer_state.json
+[2025-10-24 23:56:06] Checkpoint details:
+[2025-10-24 23:56:06]   Path: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10
+[2025-10-24 23:56:06]   Last modified: 2025-10-24 23:56:03.255712120 -0400
+[2025-10-24 23:56:06]   Training step: 10
+[2025-10-24 23:56:06] Updating merge config to point to checkpoint...
+Successfully updated merge config
+[2025-10-24 23:56:06] Updated merge config to use: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10
+[2025-10-24 23:56:06]
+[2025-10-24 23:56:06] Merge config contents:
+[2025-10-24 23:56:06]   template: qwen
+[2025-10-24 23:56:06]   trust_remote_code: true
+[2025-10-24 23:56:06]   export_dir: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged
+[2025-10-24 23:56:06]   model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct
+[2025-10-24 23:56:06]   adapter_name_or_path: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10
+[2025-10-24 23:56:06]
+[2025-10-24 23:56:06] Executing command: llamafactory-cli export /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/configs/merge_config.yaml
+/scratch/zrs2020/miniconda/miniconda3/envs/llamafactory/lib/python3.12/site-packages/transformers/utils/hub.py:110: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/scratch/zrs2020/miniconda/miniconda3/envs/llamafactory/lib/python3.12/site-packages/jieba/_compat.py:18: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+  import pkg_resources
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:13,985 >> loading file vocab.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/vocab.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:13,986 >> loading file merges.txt from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/merges.txt
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:13,986 >> loading file tokenizer.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/tokenizer.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:13,986 >> loading file added_tokens.json from cache at None
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:13,986 >> loading file special_tokens_map.json from cache at None
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:13,986 >> loading file tokenizer_config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:13,986 >> loading file chat_template.jinja from cache at None
+[INFO|tokenization_utils_base.py:2364] 2025-10-24 23:56:14,157 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+[INFO|configuration_utils.py:765] 2025-10-24 23:56:14,372 >> loading configuration file config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/config.json
+[INFO|configuration_utils.py:839] 2025-10-24 23:56:14,374 >> Model config Qwen2Config {
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:14,443 >> loading file vocab.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/vocab.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:14,443 >> loading file merges.txt from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/merges.txt
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:14,443 >> loading file tokenizer.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/tokenizer.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:14,443 >> loading file added_tokens.json from cache at None
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:14,443 >> loading file special_tokens_map.json from cache at None
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:14,443 >> loading file tokenizer_config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2095] 2025-10-24 23:56:14,443 >> loading file chat_template.jinja from cache at None
+[INFO|tokenization_utils_base.py:2364] 2025-10-24 23:56:14,608 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+[INFO|configuration_utils.py:765] 2025-10-24 23:56:14,663 >> loading configuration file config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/config.json
+[INFO|configuration_utils.py:839] 2025-10-24 23:56:14,663 >> Model config Qwen2Config {
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
+[WARNING|logging.py:328] 2025-10-24 23:56:14,663 >> `torch_dtype` is deprecated! Use `dtype` instead!
+[INFO|2025-10-24 23:56:14] llamafactory.model.model_utils.kv_cache:143 >> KV cache is enabled for faster generation.
+[WARNING|logging.py:328] 2025-10-24 23:56:15,013 >> `torch_dtype` is deprecated! Use `dtype` instead!
+[INFO|modeling_utils.py:1172] 2025-10-24 23:56:15,014 >> loading weights file model.safetensors from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/model.safetensors
+[INFO|modeling_utils.py:2341] 2025-10-24 23:56:15,015 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.
+[INFO|configuration_utils.py:986] 2025-10-24 23:56:15,016 >> Generate config GenerationConfig {
+  "bos_token_id": 151643,
+  "eos_token_id": 151645
+}
+[INFO|configuration_utils.py:941] 2025-10-24 23:56:15,118 >> loading configuration file generation_config.json from cache at /scratch/zrs2020/.cache/hf_cache/home/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306/generation_config.json
+[INFO|configuration_utils.py:986] 2025-10-24 23:56:15,119 >> Generate config GenerationConfig {
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.1,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8
+}
+[INFO|dynamic_module_utils.py:423] 2025-10-24 23:56:15,148 >> Could not locate the custom_generate/generate.py inside Qwen/Qwen2.5-1.5B-Instruct.
+[INFO|2025-10-24 23:56:15] llamafactory.model.model_utils.attention:143 >> Using torch SDPA for faster training and inference.
+[INFO|2025-10-24 23:56:17] llamafactory.model.adapter:143 >> Merged 1 adapter(s).
+[INFO|2025-10-24 23:56:17] llamafactory.model.adapter:143 >> Loaded adapter(s): /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10
+[INFO|2025-10-24 23:56:17] llamafactory.model.loader:143 >> all params: 1,543,714,304
+[INFO|2025-10-24 23:56:17] llamafactory.train.tuner:143 >> Convert model dtype to: torch.bfloat16.
+[INFO|configuration_utils.py:491] 2025-10-24 23:56:17,909 >> Configuration saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged/config.json
+[INFO|configuration_utils.py:757] 2025-10-24 23:56:17,914 >> Configuration saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged/generation_config.json
+[INFO|modeling_utils.py:4181] 2025-10-24 23:56:21,705 >> Model weights saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged/model.safetensors
+[INFO|tokenization_utils_base.py:2421] 2025-10-24 23:56:21,725 >> chat template saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged/chat_template.jinja
+[INFO|tokenization_utils_base.py:2590] 2025-10-24 23:56:21,745 >> tokenizer config file saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2599] 2025-10-24 23:56:21,765 >> Special tokens file saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged/special_tokens_map.json
+[INFO|2025-10-24 23:56:21] llamafactory.train.tuner:143 >> Ollama modelfile saved in /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged/Modelfile
+[2025-10-24 23:56:22]
+[2025-10-24 23:56:22] ========================================
+[2025-10-24 23:56:22] Merge/Export completed successfully
+[2025-10-24 23:56:22] End Time: Fri Oct 24 11:56:22 PM EDT 2025
+[2025-10-24 23:56:22] ========================================
+[2025-10-24 23:56:22]
+[2025-10-24 23:56:22] ========================================
+[2025-10-24 23:56:22] Preparing Training Artifacts
+[2025-10-24 23:56:22] ========================================
+[2025-10-24 23:56:22] Copying configuration files...
+[2025-10-24 23:56:22] Copying and cleaning training logs...

training_artifacts/merge_config.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+template: qwen
+trust_remote_code: true
+export_dir: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/merged
+model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct
+adapter_name_or_path: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints/checkpoint-10

training_artifacts/train_config.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+stage: sft
+do_train: true
+max_samples: 100000
+do_eval: false
+save_strategy: steps
+save_steps: 5
+logging_steps: 10
+fp16: false
+bf16: true
+adam_beta1: 0.9
+adam_beta2: 0.95
+overwrite_output_dir: true
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+learning_rate: 1.0e-06
+lr_scheduler_type: cosine
+num_train_epochs: 2
+warmup_ratio: 0.05
+weight_decay: 0.0001
+template: qwen
+max_steps: 10
+preprocessing_num_workers: 16
+overwrite_cache: true
+model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct
+finetuning_type: lora
+trust_remote_code: true
+dataset: TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
+dataset_dir: /scratch/zrs2020/LlamaFactoryHelper/LLaMA-Factory/data
+cutoff_len: 16192
+tokenized_path: /scratch/zrs2020/.cache/llamafactory/tokenized/TAUR_dev_D_SFT_C_ours_cd3arg_10responses_reflections10_formats_C_full
+output_dir: /scratch/zrs2020/LlamaFactoryHelper/experiments/testing__pvv2_lora/checkpoints

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff