Add configs and stuff (#2)

Esse commit está contido em:
lewtun
2025-01-24 20:05:18 +01:00
commit de GitHub
commit 6acc9a0aa0
13 arquivos alterados com 287 adições e 182 exclusões
+5 -5
Ver Arquivo
@@ -3,7 +3,7 @@
# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
export PYTHONPATH = src
check_dirs := src tests scripts
check_dirs := src scripts
style:
black --line-length 119 --target-version py310 $(check_dirs) setup.py
@@ -18,16 +18,16 @@ quality:
# Release stuff
pre-release:
python src/alignment/release.py
python src/open_r1/release.py
pre-patch:
python src/alignment/release.py --patch
python src/open_r1/release.py --patch
post-release:
python src/alignment/release.py --post_release
python src/open_r1/release.py --post_release
post-patch:
python src/alignment/release.py --post_release --patch
python src/open_r1/release.py --post_release --patch
wheels:
python setup.py bdist_wheel && python setup.py sdist
+43 -1
Ver Arquivo
@@ -1 +1,43 @@
# open_r1
# Open R1
## Installation instructions
To run the code in this project, first, create a Python virtual environment using e.g. Conda:
```shell
conda create -n openr1 python=3.11 && conda activate openr1
```
Next, install vLLM:
```shell
pip install vllm==0.6.6.post1
# For HF (cluster only has CUDA 12.1)
pip install vllm==0.6.6.post1 --extra-index-url https://download.pytorch.org/whl/cu121
```
This will also install PyTorch `v2.5.1` and it is **very important** to use this version since the vLLM binaries are compiled for it. You can then install the remaining dependencies for your specific use case via `pip install -e .[LIST OF MODES]`. For most contributors, we recommend:
```shell
pip install -e ".[dev]"
```
Next, log into your Hugging Face and Weights and Biases accounts as follows:
```shell
huggingface-cli login
wandb login
```
Finally, check your system has Git LFS installed so that you can load and push models/datasets to the Hugging Face Hub:
```shell
git-lfs --version
```
If it isn't installed, run:
```shell
sudo apt-get install git-lfs
```
@@ -0,0 +1,45 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-1.5B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_mixer:
HuggingFaceH4/Bespoke-Stratos-17k: 1.0
dataset_splits:
- train
- test
preprocessing_num_workers: 12
# SFT trainer config
bf16: true
do_eval: true
eval_strategy: epoch
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: False
hub_model_id: HuggingFaceH4/Qwen2.5-1.5B-R1-v00.00
hub_strategy: every_save
learning_rate: 2.0e-05
log_level: info
logging_steps: 5
logging_strategy: steps
lr_scheduler_type: cosine
max_seq_length: 2048
max_steps: -1
num_train_epochs: 1
output_dir: data/Qwen2.5-1.5B-Distill-R1-v00.00
overwrite_output_dir: true
per_device_eval_batch_size: 8
per_device_train_batch_size: 16
push_to_hub: true
remove_unused_columns: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 100
save_total_limit: 1
seed: 42
warmup_ratio: 0.1
@@ -0,0 +1,22 @@
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
deepspeed_multinode_launcher: standard
offload_optimizer_device: none
offload_param_device: none
zero3_init_flag: true
zero3_save_16bit_model: true
zero_stage: 3
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
+26
Ver Arquivo
@@ -0,0 +1,26 @@
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: FSDP
downcast_bf16: 'no'
enable_cpu_affinity: false
fsdp_config:
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
fsdp_backward_prefetch: BACKWARD_PRE
fsdp_cpu_ram_efficient_loading: true
fsdp_forward_prefetch: true
fsdp_offload_params: false
fsdp_sharding_strategy: FULL_SHARD
fsdp_state_dict_type: SHARDED_STATE_DICT
fsdp_sync_module_states: true
fsdp_use_orig_params: true
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
+25
Ver Arquivo
@@ -0,0 +1,25 @@
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: FSDP
downcast_bf16: 'no'
fsdp_config:
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
fsdp_backward_prefetch: BACKWARD_PRE
fsdp_cpu_ram_efficient_loading: true
fsdp_forward_prefetch: false
fsdp_offload_params: true
fsdp_sharding_strategy: FULL_SHARD
fsdp_state_dict_type: SHARDED_STATE_DICT
fsdp_sync_module_states: true
fsdp_use_orig_params: false
machine_rank: 0
main_training_function: main
mixed_precision: 'no'
num_machines: 1
num_processes: 2
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
+16
Ver Arquivo
@@ -0,0 +1,16 @@
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: MULTI_GPU
downcast_bf16: 'no'
gpu_ids: all
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false
+86
Ver Arquivo
@@ -0,0 +1,86 @@
#!/bin/bash
#SBATCH --ntasks-per-node=1
#SBATCH --exclusive
#SBATCH --gres=gpu:8
#SBATCH --partition=hopper-prod # Adjust this for your cluster
#SBATCH --output=/fsx/h4/logs/%x-%j.out # Adjust this for your cluster
#SBATCH --err=/fsx/h4/logs/%x-%j.err # Adjust this for your cluster
set -x -e
source ~/.bashrc
conda activate openr1
echo "START TIME: $(date)"
MODEL=$1
TASK=$2
PRECISION=$3
ACCELERATOR=$4
OPTIONAL_ARGS=$5
# Training setup
NUM_NODES=$SLURM_NNODES
GPUS_PER_NODE=8
WORLD_SIZE=$(($NUM_NODES*$GPUS_PER_NODE))
# Due to conflicts between Accelerate's DeepSpeed configs and Transformers' TrainingArguments, we need to parse the gradient accumulation steps from the config file to ensure they match
CONFIG_FILE=recipes/$MODEL/$TASK/config_$PRECISION.yaml
GRAD_ACC_STEPS=$(grep 'gradient_accumulation_steps' $CONFIG_FILE | awk '{print $2}')
# Split the string into individual arguments
IFS=' ' read -ra ARGS <<< "$OPTIONAL_ARGS"
# Loop through the arguments and find the one with "--gradient_accumulation_steps"
for arg in "${ARGS[@]}"; do
if [[ "$arg" == "--gradient_accumulation_steps="* ]]; then
# Extract the value after the equals sign
GRAD_ACC_STEPS="${arg#*=}"
break # Exit the loop once we find the desired argument
fi
done
echo "Gradient accumulation steps: $GRAD_ACC_STEPS"
# so processes know who to talk to
MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1)
MASTER_PORT=6000
export CMD=" \
scripts/run_$TASK.py $CONFIG_FILE $OPTIONAL_ARGS
"
export LAUNCHER="HF_HUB_ENABLE_HF_TRANSFER=1 ACCELERATE_LOG_LEVEL=info TRANSFORMERS_VERBOSITY=info accelerate launch \
--config_file recipes/accelerate_configs/$ACCELERATOR.yaml \
--gradient_accumulation_steps $GRAD_ACC_STEPS \
--num_machines $NUM_NODES \
--num_processes $WORLD_SIZE \
--main_process_ip $MASTER_ADDR \
--main_process_port $MASTER_PORT \
--machine_rank \$SLURM_PROCID \
--rdzv_conf "rdzv_backend=c10d,rdzv_endpoint=$MASTER_ADDR:$MASTER_PORT" \
--max_restarts 1 \
--role \$(hostname -s): \
--tee 3 \
"
# force crashing on nccl issues like hanging broadcast
export NCCL_ASYNC_ERROR_HANDLING=1
# export NCCL_DEBUG=INFO
# export NCCL_DEBUG_SUBSYS=COLL
# export NCCL_SOCKET_NTHREADS=1
# export NCCL_NSOCKS_PERTHREAD=1
# export CUDA_LAUNCH_BLOCKING=1
# Specific configuration optimized for the Hugging Face Compute Cluster
# Be ye warned this may not work on other clusters!
module load cuda/12.1
# srun error handling:
# --wait=60: wait 60 sec after the first task terminates before terminating all remaining tasks
# --kill-on-bad-exit=1: terminate a step if any task exits with a non-zero exit code
SRUN_ARGS=" \
--wait=60 \
--kill-on-bad-exit=1 \
"
clear; srun $SRUN_ARGS --jobid $SLURM_JOB_ID bash -c "$LAUNCHER --role \$SLURMD_NODENAME: $CMD" 2>&1
echo "END TIME: $(date)"
-140
Ver Arquivo
@@ -1,141 +1 @@
# Scripts to Train and Evaluate Chat Models
## Fine-tuning
In the handbook, we provide three main ways to align LLMs for chat:
- Full fine-tuning on a multi-GPU machine with DeepSpeed ZeRO-3 (tested on an 8 x A100 (80GB) node).
- LoRA or QLoRA fine-tuning on a single consumer 24GB GPU (tested on an RTX 4090).
- LoRA fine-tuning on a multi-GPU machine with DeepSpeed ZeRO-3 (tested on a 2 x A100s (80GB)).
- QLoRA fine-tuning on multi-GPU machine with FSDP (tested on a 2 x A6000s (48GB)).
In practice, we find comparable performance for both full and QLoRA fine-tuning, with the latter having the advantage of producing small adapter weights that are fast to upload and download from the Hugging Face Hub. Here are the general commands to fine-tune your models:
```shell
# Full training with ZeRO-3 on 8 GPUs
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_{task}.py recipes/{model_name}/{task}/config_full.yaml
# QLoRA 4-bit training on a single GPU
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/multi_gpu.yaml --num_processes=1 scripts/run_{task}.py recipes/{model_name}/{task}/config_qlora.yaml
# LoRA training on a single GPU
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/multi_gpu.yaml --num_processes=1 scripts/run_{task}.py recipes/{model_name}/{task}/config_qlora.yaml --load_in_4bit=false
# LoRA training with ZeRO-3 on two or more GPUs
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml --num_processes={num_gpus} scripts/run_{task}.py recipes/{model_name}/{task}/config_qlora.yaml --load_in_4bit=false
# QLoRA training with FSDP on two or more GPUs
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/fsdp+qlora.yaml --num_processes={num_gpus} scripts/run_{task}.py recipes/{model_name}/{task}/config_qlora.yaml --torch_dtype=bfloat16 --bnb_4bit_quant_storage=bfloat16
```
Here `{task}` refers to the type of training you wish to run. Currently, the following tasks are supported:
* continued pretraining `cpt` (note that `cpt` is only present in the `gpt-nl` example recipe)
* supervised finetuning `sft`
* direct preference optimisation `dpo`
* odds ratio preference optimisation `orpo`
`{model_name}` refers to the choice of a recipe in the `recipes` directory. For example, to replicate Zephyr-7B-β you can run:
```shell
# Step 1 - train SFT policy
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_sft.py recipes/zephyr-7b-beta/sft/config_full.yaml
# Step 2 - align with DPO
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_dpo.py recipes/zephyr-7b-beta/dpo/config_full.yaml
```
**💡 Tip:** If you scale up/down the number of GPUs, we recommend also scaling up the per-device batch size or number of gradient accumulation steps to keep the global batch size constant (and thus replicate our results).
By default, these scripts will push each model to your Hugging Face Hub username, i.e. `{username}/{model_name}-{task}`. You can override the parameters in each YAML config by appending them to the command as follows:
```shell
# Change batch size, number of epochs etc
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_{task}.py recipes/{model_name}/{task}/config_full.yaml --per_device_train_batch_size=42 --num_train_epochs=5
```
## Logging with Weights and Biases
By default, all training metrics are logged with TensorBoard. If you have a [Weights and Biases](https://wandb.ai/site) account and are logged in, you can view the training metrics by appending `--report_to=wandb`, e.g.
```shell
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_{task}.py recipes/{model_name}/{task}/config_full.yaml --report_to=wandb
```
## Launching jobs on a Slurm cluster
If you have access to a Slurm cluster, we provide a `recipes/launch.slurm` script that will automatically queue training jobs for you. Here's how you can use it:
```shell
sbatch --job-name=handbook_{task} --nodes=1 recipes/launch.slurm {model_name} {task} {precision} {accelerator}
```
Here `{model_name}` and `{task}` are defined as above, while `{precision}` refers to the type of training (`full` vs `qlora`) and `{accelerator}` refers to the choice of 🤗 Accelerate config in `recipes/accelerate_configs`. If you wish to override the default config parameters, you can provide them by appending a space-separated string like `'--arg1=value1 --arg2=value2'. Here's a concrete example to run SFT on 1 node of 8 GPUs:
```shell
# Launch on Slurm and override default hyperparameters
sbatch --job-name=handbook_sft --nodes=1 recipes/launch.slurm zephyr-7b-beta sft full deepspeed_zero3 '--per_device_train_batch_size=42 --num_train_epochs=5'
```
You can scale the number of nodes by increasing the `--nodes` flag.
**⚠️ Note:** the configuration in `recipes/launch.slurm` is optimised for the Hugging Face Compute Cluster and may require tweaking to be adapted to your own compute nodes.
## Fine-tuning on your datasets
Under the hood, each training script uses the `get_datasets()` function which allows one to easily combine multiple datasets with varying proportions. For instance, this is how one can specify multiple datasets and which splits to combine in one of the YAML configs:
```yaml
datasets_mixer:
dataset_1: 0.5 # Use 50% of the training examples
dataset_2: 0.66 # Use 66% of the training examples
dataset_3: 0.10 # Use 10% of the training examples
dataset_splits:
- train_xxx # The training splits to mix
- test_xxx # The test splits to mix
```
If you want to fine-tune on your datasets, the main thing to keep in mind is how the chat templates are applied to the dataset blend. Since each task (SFT, DPO, ORPO, etc), requires a different format, we assume the datasets have the following columns:
**SFT**
* `messages`: A list of `dicts` in the form `{"role": "{role}", "content": {content}}`.
* See [ultrachat_200k](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k) for an example.
**DPO and ORPO**
* `chosen`: A list of `dicts` in the form `{"role": "{role}", "content": {content}}` corresponding to the preferred dialogue.
* `rejected`: A list of `dicts` in the form `{"role": "{role}", "content": {content}}` corresponding to the dispreferred dialogue.
* See [ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized) for an example.
We also find it useful to include dedicated splits per task in our datasets, so e.g. we have:
* `{train,test}_sft`: Splits for SFT training.
* `{train,test}_gen`: Splits for generation ranking like rejection sampling or PPO.
* `{train,test}_prefs`: Splits for preference modelling, like reward modelling or DPO.
If you format your dataset in the same way, our training scripts should work out of the box!
## Evaluating chat models
We recommend benchmarking chat models on:
* [MT-Bench](https://huggingface.co/spaces/lmsys/mt-bench): a multi-turn benchmark spanning 80 dialogues and 10 domains.
* [AlpacaEval](https://github.com/tatsu-lab/alpaca_eval): a single-turn benchmark that evaluates the helpfulness of chat and instruct models against `text-davinci-003`.
For both benchmarks, we have added support for the [Zephyr chat template](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full/blob/ac6e600eefcce74f5e8bae1035d4f66019e93190/tokenizer_config.json#L30) (which is the default produced by our scripts), so you can evaluate models produced by our scripts as follows:
**MT-Bench**
* Follow the installation instructions [here](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge)
* Make sure the word `zephyr` exists in the `--model-path` argument when generating the model responses [here](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge#step-1-generate-model-answers-to-mt-bench-questions). This will ensure the correct chat template is loaded. For example, the following model name is valid: `--model-path {hub_username}/my-baby-zephyr`
* Generate the model responses and GPT-4 rankings.
**AlpacaEval**
* Follow the installation instructions [here](https://github.com/tatsu-lab/alpaca_eval#quick-start)
* Copy-paste the [config](https://github.com/tatsu-lab/alpaca_eval/blob/main/src/alpaca_eval/models_configs/zephyr-7b-beta/configs.yaml) for `zephyr-7b-beta` and place it in the `model_configs` directory under `{your_zephyr_model}`.
* Next, update the [config name](https://github.com/tatsu-lab/alpaca_eval/blob/2daa6e11b194653043ca74f735728dc068e04aae/src/alpaca_eval/models_configs/zephyr-7b-beta/configs.yaml#L1) and [Hub model ID](https://github.com/tatsu-lab/alpaca_eval/blob/2daa6e11b194653043ca74f735728dc068e04aae/src/alpaca_eval/models_configs/zephyr-7b-beta/configs.yaml#L5) to match your model name.
* Follow the steps to evaluate your model [here](https://github.com/tatsu-lab/alpaca_eval/tree/main#evaluating-a-model).
Note that MT-Bench and AlpacaEval rely on LLMs like GPT-4 to judge the quality of the model responses, and thus the ranking exhibits various biases including a preference for models distilled from GPTs. For that reason, we also recommend submitting your best models for human evaluation in:
* [Chatbot Arena](https://chat.lmsys.org): a live, human evaluation of chat models in head-to-head comparisons.
+2 -2
Ver Arquivo
@@ -1,6 +1,6 @@
#!/usr/bin/env python
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -204,7 +204,7 @@ def main():
"finetuned_from": model_args.model_name_or_path,
"dataset": list(data_args.dataset_mixer.keys()),
"dataset_tags": list(data_args.dataset_mixer.keys()),
"tags": ["alignment-handbook"],
"tags": ["open-r1"],
}
if trainer.accelerator.is_main_process:
trainer.create_model_card(**kwargs)
+1 -1
Ver Arquivo
@@ -3,7 +3,7 @@ default_section = FIRSTPARTY
ensure_newline_before_comments = True
force_grid_wrap = 0
include_trailing_comma = True
known_first_party = alignment
known_first_party = open_r1
known_third_party =
transformers
datasets
+15 -32
Ver Arquivo
@@ -41,34 +41,27 @@ if stale_egg_info.exists():
# IMPORTANT: all dependencies should be listed here with their version requirements, if any.
# * If a dependency is fast-moving (e.g. transformers), pin to the exact version
_deps = [
"accelerate>=0.29.2",
"accelerate>=1.2.1",
"bitsandbytes>=0.43.0",
"black>=24.4.2",
"datasets>=2.18.0",
"deepspeed>=0.14.4",
"einops>=0.6.1",
"evaluate==0.4.0",
"datasets>=3.2.0",
"deepspeed==0.15.4",
"einops>=0.8.0",
"flake8>=6.0.0",
"hf-doc-builder>=0.4.0",
"hf_transfer>=0.1.4",
"huggingface-hub>=0.19.2,<1.0",
"huggingface-hub[cli]>=0.19.2,<1.0",
"isort>=5.12.0",
"ninja>=1.11.1",
"numpy>=1.24.2",
"lighteval @ git+https://github.com/huggingface/lighteval.git@4f381b352c0e467b5870a97d41cb66b487a2c503#egg=lighteval[math]",
"packaging>=23.0",
"parameterized>=0.9.0",
"peft>=0.9.0",
"protobuf<=3.20.2", # Needed to avoid conflicts with `transformers`
"pytest",
"safetensors>=0.3.3",
"sentencepiece>=0.1.99",
"scipy",
"tensorboard",
"torch>=2.1.2",
"transformers>=4.39.3",
"trl @ git+https://github.com/huggingface/trl.git",
"jinja2>=3.0.0",
"tqdm>=4.64.1",
"torch>=2.5.1",
"transformers==4.48.1",
"trl @ git+https://github.com/huggingface/trl.git@6f99f42f724123409422f2fad42bf56fa91f366f", # Bump when this is merged: https://github.com/huggingface/trl/pull/2650
"vllm==0.6.6.post1",
"wandb>=0.19.1",
]
# this is a lookup table with items like:
@@ -88,45 +81,35 @@ extras = {}
extras["tests"] = deps_list("pytest", "parameterized")
extras["torch"] = deps_list("torch")
extras["quality"] = deps_list("black", "isort", "flake8")
extras["docs"] = deps_list("hf-doc-builder")
extras["dev"] = extras["docs"] + extras["quality"] + extras["tests"]
extras["dev"] = extras["quality"] + extras["tests"]
# core dependencies shared across the whole project - keep this to a bare minimum :)
install_requires = [
deps["accelerate"],
deps["bitsandbytes"],
deps["einops"],
deps["evaluate"],
deps["datasets"],
deps["deepspeed"],
deps["hf_transfer"],
deps["huggingface-hub"],
deps["jinja2"],
deps["ninja"],
deps["numpy"],
deps["packaging"], # utilities from PyPA to e.g., compare versions
deps["peft"],
deps["protobuf"],
deps["safetensors"],
deps["sentencepiece"],
deps["scipy"],
deps["tensorboard"],
deps["tqdm"], # progress bars in model download and training scripts
deps["transformers"],
deps["trl"],
]
setup(
name="open_r1",
name="open-r1",
version="0.1.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="The Hugging Face team (past and future)",
author_email="lewis@huggingface.co",
description="Open R1",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",
keywords="nlp deep learning rlhf llm",
keywords="llm inference-time compute reasoning",
license="Apache",
url="https://github.com/huggingface/open_r1",
url="https://github.com/huggingface/open-r1",
package_dir={"": "src"},
packages=find_packages("src"),
zip_safe=False,
+1 -1
Ver Arquivo
@@ -38,7 +38,7 @@ REPLACE_PATTERNS = {
README_FILE = "README.md"
REPLACE_FILES = {
"init": "src/alignment/__init__.py",
"init": "src/open_r1/__init__.py",
"setup": "setup.py",
"citation": "CITATION.cff",
"readme": README_FILE,