Spaces:

ivangabriele
/

trl-sandbox

Paused

App Files Files Community

trl-sandbox / docs /source /_toctree.yml

ivangabriele

feat: initialize project

2f5127c verified 5 months ago

raw

history blame contribute delete

2.85 kB

	- sections:
	- local: index
	title: TRL
	- local: installation
	title: Installation
	- local: quickstart
	title: Quickstart
	title: Getting started
	- sections:
	- local: dataset_formats
	title: Dataset Formats
	- local: how_to_train
	title: Training FAQ
	- local: logging
	title: Understanding Logs
	title: Conceptual Guides
	- sections:
	- local: clis
	title: Command Line Interface (CLI)
	- local: customization
	title: Customizing the Training
	- local: reducing_memory_usage
	title: Reducing Memory Usage
	- local: speeding_up_training
	title: Speeding Up Training
	- local: distributing_training
	title: Distributing Training
	- local: use_model
	title: Using Trained Models
	title: How-to guides
	- sections:
	- local: deepspeed_integration
	title: DeepSpeed
	- local: liger_kernel_integration
	title: Liger Kernel
	- local: peft_integration
	title: PEFT
	- local: unsloth_integration
	title: Unsloth
	- local: vllm_integration
	title: vLLM
	title: Integrations
	- sections:
	- local: example_overview
	title: Example Overview
	- local: community_tutorials
	title: Community Tutorials
	- local: sentiment_tuning
	title: Sentiment Tuning
	- local: using_llama_models
	title: Training StackLlama
	- local: detoxifying_a_lm
	title: Detoxifying a Language Model
	- local: multi_adapter_rl
	title: Multi Adapter RLHF
	- local: training_vlm_sft
	title: Fine-tuning a Multimodal Model Using SFT (Single or Multi-Image Dataset)
	title: Examples
	- sections:
	- sections: # Sorted alphabetically
	- local: alignprop_trainer
	title: AlignProp
	- local: bco_trainer
	title: BCO
	- local: cpo_trainer
	title: CPO
	- local: ddpo_trainer
	title: DDPO
	- local: dpo_trainer
	title: DPO
	- local: online_dpo_trainer
	title: Online DPO
	- local: gkd_trainer
	title: GKD
	- local: grpo_trainer
	title: GRPO
	- local: kto_trainer
	title: KTO
	- local: nash_md_trainer
	title: Nash-MD
	- local: orpo_trainer
	title: ORPO
	- local: ppo_trainer
	title: PPO
	- local: prm_trainer
	title: PRM
	- local: reward_trainer
	title: Reward
	- local: rloo_trainer
	title: RLOO
	- local: sft_trainer
	title: SFT
	- local: iterative_sft_trainer
	title: Iterative SFT
	- local: xpo_trainer
	title: XPO
	title: Trainers
	- local: models
	title: Model Classes
	- local: model_utils
	title: Model Utilities
	- local: best_of_n
	title: Best of N Sampling
	- local: judges
	title: Judges
	- local: callbacks
	title: Callbacks
	- local: data_utils
	title: Data Utilities
	- local: rewards
	title: Reward Functions
	- local: script_utils
	title: Script Utilities
	- local: others
	title: Others
	title: API