Spaces:
				
			
			
	
			
			
		Build error
		
	
	
	
			
			
	
	
	
	
		
		
		Build error
		
	complete gpt-4o-mini training
Browse files- datasets/mac/openai-training.jsonl +0 -0
- llm_toolkit/eval_openai.py +16 -10
- llm_toolkit/translation_utils.py +85 -7
- logs/{l40-1gpu-rpp.txt → l40-1gpu-rpp-1.txt} +0 -0
- logs/l40-4gpu-1.txt +0 -0
- logs/l40-4gpu.txt +0 -3
- logs/openai-gpt-4o-mini-fine-tuned.txt +151 -0
- logs/openai-training-sample.jsonl +3 -0
- notebooks/00b_Data Analysis_Few_Shots.ipynb +2 -2
- notebooks/00c_Data Analysis_Fine_Tuned.ipynb +0 -0
- notebooks/00d_Data Analysis_Fine_Tuned_RPP.ipynb +0 -0
- notebooks/02_Fine_Tune_OpenAI.ipynb +0 -0
- requirements.txt +1 -0
- results/mac-results_few_shots_metrics.csv +2 -2
- results/mac-results_few_shots_openai.csv +2 -2
- results/mac-results_fine_tuned_metrics.csv +2 -2
- scripts/eval-4gpu.sh +2 -4
    	
        datasets/mac/openai-training.jsonl
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        llm_toolkit/eval_openai.py
    CHANGED
    
    | @@ -29,7 +29,7 @@ print( | |
| 29 | 
             
            )
         | 
| 30 |  | 
| 31 |  | 
| 32 | 
            -
            def on_num_shots_step_completed(model_name, dataset, predictions):
         | 
| 33 | 
             
                save_results(
         | 
| 34 | 
             
                    model_name,
         | 
| 35 | 
             
                    results_path,
         | 
| @@ -44,8 +44,10 @@ def on_num_shots_step_completed(model_name, dataset, predictions): | |
| 44 | 
             
            def evaluate_model_with_num_shots(
         | 
| 45 | 
             
                model_name,
         | 
| 46 | 
             
                data_path,
         | 
|  | |
| 47 | 
             
                range_num_shots=[0, 1, 3, 5, 10, 50],
         | 
| 48 | 
             
                max_new_tokens=2048,
         | 
|  | |
| 49 | 
             
            ):
         | 
| 50 | 
             
                print(f"Evaluating model: {model_name}")
         | 
| 51 |  | 
| @@ -56,20 +58,24 @@ def evaluate_model_with_num_shots( | |
| 56 | 
             
                    print(f"*** Evaluating with num_shots: {num_shots}")
         | 
| 57 |  | 
| 58 | 
             
                    predictions = eval_openai(num_shots, datasets, max_new_tokens=max_new_tokens)
         | 
| 59 | 
            -
                    model_name_with_shorts =  | 
|  | |
|  | |
|  | |
|  | |
| 60 |  | 
| 61 | 
             
                    try:
         | 
| 62 | 
             
                        on_num_shots_step_completed(
         | 
| 63 | 
            -
                            model_name_with_shorts,
         | 
| 64 | 
            -
                            datasets["test"],
         | 
| 65 | 
            -
                            predictions,
         | 
| 66 | 
             
                        )
         | 
| 67 | 
             
                    except Exception as e:
         | 
| 68 | 
             
                        print(e)
         | 
| 69 |  | 
| 70 |  | 
| 71 | 
            -
             | 
| 72 | 
            -
                 | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 75 | 
            -
             | 
|  | |
|  | 
|  | |
| 29 | 
             
            )
         | 
| 30 |  | 
| 31 |  | 
| 32 | 
            +
            def on_num_shots_step_completed(model_name, dataset, predictions, results_path):
         | 
| 33 | 
             
                save_results(
         | 
| 34 | 
             
                    model_name,
         | 
| 35 | 
             
                    results_path,
         | 
|  | |
| 44 | 
             
            def evaluate_model_with_num_shots(
         | 
| 45 | 
             
                model_name,
         | 
| 46 | 
             
                data_path,
         | 
| 47 | 
            +
                results_path=None,
         | 
| 48 | 
             
                range_num_shots=[0, 1, 3, 5, 10, 50],
         | 
| 49 | 
             
                max_new_tokens=2048,
         | 
| 50 | 
            +
                result_column_name=None,
         | 
| 51 | 
             
            ):
         | 
| 52 | 
             
                print(f"Evaluating model: {model_name}")
         | 
| 53 |  | 
|  | |
| 58 | 
             
                    print(f"*** Evaluating with num_shots: {num_shots}")
         | 
| 59 |  | 
| 60 | 
             
                    predictions = eval_openai(num_shots, datasets, max_new_tokens=max_new_tokens)
         | 
| 61 | 
            +
                    model_name_with_shorts = (
         | 
| 62 | 
            +
                        result_column_name
         | 
| 63 | 
            +
                        if result_column_name
         | 
| 64 | 
            +
                        else f"{model_name}/shots-{num_shots:02d}"
         | 
| 65 | 
            +
                    )
         | 
| 66 |  | 
| 67 | 
             
                    try:
         | 
| 68 | 
             
                        on_num_shots_step_completed(
         | 
| 69 | 
            +
                            model_name_with_shorts, datasets["test"], predictions, results_path
         | 
|  | |
|  | |
| 70 | 
             
                        )
         | 
| 71 | 
             
                    except Exception as e:
         | 
| 72 | 
             
                        print(e)
         | 
| 73 |  | 
| 74 |  | 
| 75 | 
            +
            if __name__ == "__main__":
         | 
| 76 | 
            +
                evaluate_model_with_num_shots(
         | 
| 77 | 
            +
                    model_name,
         | 
| 78 | 
            +
                    data_path,
         | 
| 79 | 
            +
                    results_path=results_path,
         | 
| 80 | 
            +
                    max_new_tokens=max_new_tokens,
         | 
| 81 | 
            +
                )
         | 
    	
        llm_toolkit/translation_utils.py
    CHANGED
    
    | @@ -18,6 +18,7 @@ bleu = evaluate.load("bleu") | |
| 18 | 
             
            rouge = evaluate.load("rouge")
         | 
| 19 | 
             
            meteor = evaluate.load("meteor")
         | 
| 20 | 
             
            accuracy = evaluate.load("accuracy")
         | 
|  | |
| 21 |  | 
| 22 |  | 
| 23 | 
             
            def extract_answer(text, debug=False):
         | 
| @@ -54,6 +55,10 @@ def calc_metrics(references, predictions, debug=False): | |
| 54 | 
             
                    "meteor"
         | 
| 55 | 
             
                ]
         | 
| 56 |  | 
|  | |
|  | |
|  | |
|  | |
| 57 | 
             
                results["bleu_scores"] = bleu.compute(
         | 
| 58 | 
             
                    predictions=predictions, references=references, max_order=4
         | 
| 59 | 
             
                )
         | 
| @@ -108,7 +113,7 @@ def get_few_shot_prompt(dataset, num_shots=5): | |
| 108 | 
             
                return translation_prompt
         | 
| 109 |  | 
| 110 |  | 
| 111 | 
            -
            def load_translation_dataset(data_path, tokenizer=None, num_shots=0):
         | 
| 112 | 
             
                train_data_file = data_path.replace(".tsv", "-train.tsv")
         | 
| 113 | 
             
                test_data_file = data_path.replace(".tsv", "-test.tsv")
         | 
| 114 |  | 
| @@ -138,7 +143,7 @@ def load_translation_dataset(data_path, tokenizer=None, num_shots=0): | |
| 138 | 
             
                    delimiter="\t",
         | 
| 139 | 
             
                )
         | 
| 140 |  | 
| 141 | 
            -
                if tokenizer:
         | 
| 142 | 
             
                    translation_prompt = get_few_shot_prompt(datasets["train"], num_shots)
         | 
| 143 |  | 
| 144 | 
             
                    def formatting_prompts_func(examples):
         | 
| @@ -164,11 +169,23 @@ def load_translation_dataset(data_path, tokenizer=None, num_shots=0): | |
| 164 | 
             
                            prompt = translation_prompt.format(input=input)
         | 
| 165 | 
             
                            messages[-1] = {"role": "user", "content": prompt}
         | 
| 166 |  | 
| 167 | 
            -
                             | 
| 168 | 
            -
                                messages | 
| 169 | 
            -
             | 
| 170 | 
            -
             | 
| 171 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 172 | 
             
                        return {"text": texts, "prompt": prompts}
         | 
| 173 |  | 
| 174 | 
             
                    datasets = datasets.map(
         | 
| @@ -216,6 +233,11 @@ def detect_repetition_scores(row, col, debug=False): | |
| 216 | 
             
                )
         | 
| 217 |  | 
| 218 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 219 | 
             
            def get_metrics(df, max_output_tokens=2048, variant="rpp"):
         | 
| 220 | 
             
                metrics_df = pd.DataFrame(df.columns.T)[2:]
         | 
| 221 | 
             
                metrics_df.rename(columns={0: "model"}, inplace=True)
         | 
| @@ -235,12 +257,14 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"): | |
| 235 | 
             
                tokenizers = {model: load_tokenizer(model) for model in models}
         | 
| 236 |  | 
| 237 | 
             
                meteor = []
         | 
|  | |
| 238 | 
             
                bleu_1 = []
         | 
| 239 | 
             
                rouge_l = []
         | 
| 240 | 
             
                ews_score = []
         | 
| 241 | 
             
                repetition_score = []
         | 
| 242 | 
             
                total_repetitions = []
         | 
| 243 | 
             
                num_max_output_tokens = []
         | 
|  | |
| 244 | 
             
                columns = df.columns[2:]
         | 
| 245 |  | 
| 246 | 
             
                df[
         | 
| @@ -256,6 +280,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"): | |
| 256 | 
             
                    print(f"{col}: {metrics}")
         | 
| 257 |  | 
| 258 | 
             
                    meteor.append(metrics["meteor"])
         | 
|  | |
| 259 | 
             
                    bleu_1.append(metrics["bleu_scores"]["bleu"])
         | 
| 260 | 
             
                    rouge_l.append(metrics["rouge_scores"]["rougeL"])
         | 
| 261 |  | 
| @@ -273,6 +298,10 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"): | |
| 273 | 
             
                        lambda x: len(tokenizers[model](x)["input_ids"])
         | 
| 274 | 
             
                    )
         | 
| 275 |  | 
|  | |
|  | |
|  | |
|  | |
| 276 | 
             
                    new_col = f"output_tokens-{col}"
         | 
| 277 | 
             
                    df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
         | 
| 278 |  | 
| @@ -281,6 +310,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"): | |
| 281 | 
             
                    )
         | 
| 282 |  | 
| 283 | 
             
                metrics_df["meteor"] = meteor
         | 
|  | |
| 284 | 
             
                metrics_df["bleu_1"] = bleu_1
         | 
| 285 | 
             
                metrics_df["rouge_l"] = rouge_l
         | 
| 286 | 
             
                metrics_df["ews_score"] = ews_score
         | 
| @@ -290,6 +320,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"): | |
| 290 | 
             
                    lambda x: x["meteor"] / math.log10(10 + x["total_repetitions"]), axis=1
         | 
| 291 | 
             
                )
         | 
| 292 |  | 
|  | |
| 293 | 
             
                metrics_df["num_max_output_tokens"] = num_max_output_tokens
         | 
| 294 |  | 
| 295 | 
             
                if variant != "rpp":
         | 
| @@ -328,6 +359,12 @@ def analyze_translation_results(df, col, max_new_tokens=300, repetition_threshol | |
| 328 | 
             
                )
         | 
| 329 | 
             
                print_row_details(df2, range(len(df2)))
         | 
| 330 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 331 |  | 
| 332 | 
             
            def plot_metrics(metrics_df, figsize=(14, 5), ylim=(0, 0.44)):
         | 
| 333 | 
             
                plt.figure(figsize=figsize)
         | 
| @@ -604,3 +641,44 @@ def load_alpaca_data(data_path): | |
| 604 | 
             
                df_alpaca.to_json(alpaca_data_path, orient="records", lines=False, indent=2)
         | 
| 605 |  | 
| 606 | 
             
                return df_alpaca
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 18 | 
             
            rouge = evaluate.load("rouge")
         | 
| 19 | 
             
            meteor = evaluate.load("meteor")
         | 
| 20 | 
             
            accuracy = evaluate.load("accuracy")
         | 
| 21 | 
            +
            sacrebleu = evaluate.load("sacrebleu")
         | 
| 22 |  | 
| 23 |  | 
| 24 | 
             
            def extract_answer(text, debug=False):
         | 
|  | |
| 55 | 
             
                    "meteor"
         | 
| 56 | 
             
                ]
         | 
| 57 |  | 
| 58 | 
            +
                results["sacrebleu"] = sacrebleu.compute(
         | 
| 59 | 
            +
                    predictions=predictions, references=references
         | 
| 60 | 
            +
                )
         | 
| 61 | 
            +
             | 
| 62 | 
             
                results["bleu_scores"] = bleu.compute(
         | 
| 63 | 
             
                    predictions=predictions, references=references, max_order=4
         | 
| 64 | 
             
                )
         | 
|  | |
| 113 | 
             
                return translation_prompt
         | 
| 114 |  | 
| 115 |  | 
| 116 | 
            +
            def load_translation_dataset(data_path, tokenizer=None, num_shots=0, for_openai=False):
         | 
| 117 | 
             
                train_data_file = data_path.replace(".tsv", "-train.tsv")
         | 
| 118 | 
             
                test_data_file = data_path.replace(".tsv", "-test.tsv")
         | 
| 119 |  | 
|  | |
| 143 | 
             
                    delimiter="\t",
         | 
| 144 | 
             
                )
         | 
| 145 |  | 
| 146 | 
            +
                if tokenizer or for_openai:
         | 
| 147 | 
             
                    translation_prompt = get_few_shot_prompt(datasets["train"], num_shots)
         | 
| 148 |  | 
| 149 | 
             
                    def formatting_prompts_func(examples):
         | 
|  | |
| 169 | 
             
                            prompt = translation_prompt.format(input=input)
         | 
| 170 | 
             
                            messages[-1] = {"role": "user", "content": prompt}
         | 
| 171 |  | 
| 172 | 
            +
                            if for_openai:
         | 
| 173 | 
            +
                                prompts.append(messages.copy())
         | 
| 174 | 
            +
                                text = messages.copy()
         | 
| 175 | 
            +
                                text.append(
         | 
| 176 | 
            +
                                    {
         | 
| 177 | 
            +
                                        "role": "assistant",
         | 
| 178 | 
            +
                                        "content": output,
         | 
| 179 | 
            +
                                    }
         | 
| 180 | 
            +
                                )
         | 
| 181 | 
            +
                                texts.append(text)
         | 
| 182 | 
            +
                            else:
         | 
| 183 | 
            +
                                prompt = tokenizer.apply_chat_template(
         | 
| 184 | 
            +
                                    messages, tokenize=False, add_generation_prompt=True
         | 
| 185 | 
            +
                                )
         | 
| 186 | 
            +
                                prompts.append(prompt)
         | 
| 187 | 
            +
                                texts.append(prompt + output + tokenizer.eos_token)
         | 
| 188 | 
            +
             | 
| 189 | 
             
                        return {"text": texts, "prompt": prompts}
         | 
| 190 |  | 
| 191 | 
             
                    datasets = datasets.map(
         | 
|  | |
| 233 | 
             
                )
         | 
| 234 |  | 
| 235 |  | 
| 236 | 
            +
            def contains_chinese(text):
         | 
| 237 | 
            +
                chinese_char_pattern = re.compile(r"[\u4e00-\u9fff]")
         | 
| 238 | 
            +
                return 1 if chinese_char_pattern.search(text) else 0
         | 
| 239 | 
            +
             | 
| 240 | 
            +
             | 
| 241 | 
             
            def get_metrics(df, max_output_tokens=2048, variant="rpp"):
         | 
| 242 | 
             
                metrics_df = pd.DataFrame(df.columns.T)[2:]
         | 
| 243 | 
             
                metrics_df.rename(columns={0: "model"}, inplace=True)
         | 
|  | |
| 257 | 
             
                tokenizers = {model: load_tokenizer(model) for model in models}
         | 
| 258 |  | 
| 259 | 
             
                meteor = []
         | 
| 260 | 
            +
                spbleu = []
         | 
| 261 | 
             
                bleu_1 = []
         | 
| 262 | 
             
                rouge_l = []
         | 
| 263 | 
             
                ews_score = []
         | 
| 264 | 
             
                repetition_score = []
         | 
| 265 | 
             
                total_repetitions = []
         | 
| 266 | 
             
                num_max_output_tokens = []
         | 
| 267 | 
            +
                num_incomplete_translations = []
         | 
| 268 | 
             
                columns = df.columns[2:]
         | 
| 269 |  | 
| 270 | 
             
                df[
         | 
|  | |
| 280 | 
             
                    print(f"{col}: {metrics}")
         | 
| 281 |  | 
| 282 | 
             
                    meteor.append(metrics["meteor"])
         | 
| 283 | 
            +
                    spbleu.append(metrics["sacrebleu"]["score"])
         | 
| 284 | 
             
                    bleu_1.append(metrics["bleu_scores"]["bleu"])
         | 
| 285 | 
             
                    rouge_l.append(metrics["rouge_scores"]["rougeL"])
         | 
| 286 |  | 
|  | |
| 298 | 
             
                        lambda x: len(tokenizers[model](x)["input_ids"])
         | 
| 299 | 
             
                    )
         | 
| 300 |  | 
| 301 | 
            +
                    new_col = f"contains_chinese-{col}"
         | 
| 302 | 
            +
                    df[new_col] = df[col].apply(contains_chinese)
         | 
| 303 | 
            +
                    num_incomplete_translations.append(df[new_col].sum())
         | 
| 304 | 
            +
             | 
| 305 | 
             
                    new_col = f"output_tokens-{col}"
         | 
| 306 | 
             
                    df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
         | 
| 307 |  | 
|  | |
| 310 | 
             
                    )
         | 
| 311 |  | 
| 312 | 
             
                metrics_df["meteor"] = meteor
         | 
| 313 | 
            +
                metrics_df["spbleu"] = spbleu
         | 
| 314 | 
             
                metrics_df["bleu_1"] = bleu_1
         | 
| 315 | 
             
                metrics_df["rouge_l"] = rouge_l
         | 
| 316 | 
             
                metrics_df["ews_score"] = ews_score
         | 
|  | |
| 320 | 
             
                    lambda x: x["meteor"] / math.log10(10 + x["total_repetitions"]), axis=1
         | 
| 321 | 
             
                )
         | 
| 322 |  | 
| 323 | 
            +
                metrics_df["num_incomplete_translations"] = num_incomplete_translations
         | 
| 324 | 
             
                metrics_df["num_max_output_tokens"] = num_max_output_tokens
         | 
| 325 |  | 
| 326 | 
             
                if variant != "rpp":
         | 
|  | |
| 359 | 
             
                )
         | 
| 360 | 
             
                print_row_details(df2, range(len(df2)))
         | 
| 361 |  | 
| 362 | 
            +
                contains_chinese = f"contains_chinese-{col}"
         | 
| 363 | 
            +
                df3 = df[df[contains_chinese] > 0][["chinese", "english", col, contains_chinese]]
         | 
| 364 | 
            +
             | 
| 365 | 
            +
                print(f"\n*** Found {len(df3)} rows with incomplete translations for {col}")
         | 
| 366 | 
            +
                print_row_details(df3, range(len(df3)))
         | 
| 367 | 
            +
             | 
| 368 |  | 
| 369 | 
             
            def plot_metrics(metrics_df, figsize=(14, 5), ylim=(0, 0.44)):
         | 
| 370 | 
             
                plt.figure(figsize=figsize)
         | 
|  | |
| 641 | 
             
                df_alpaca.to_json(alpaca_data_path, orient="records", lines=False, indent=2)
         | 
| 642 |  | 
| 643 | 
             
                return df_alpaca
         | 
| 644 | 
            +
             | 
| 645 | 
            +
             | 
| 646 | 
            +
            def load_openai_training_data(
         | 
| 647 | 
            +
                data_path, openai_data_path="datasets/mac/openai-training.jsonl"
         | 
| 648 | 
            +
            ):
         | 
| 649 | 
            +
                if os.path.exists(openai_data_path):
         | 
| 650 | 
            +
                    print("loading existing data from:", openai_data_path)
         | 
| 651 | 
            +
                    data = pd.read_json(openai_data_path, orient="records", lines=True)
         | 
| 652 | 
            +
                    return data
         | 
| 653 | 
            +
             | 
| 654 | 
            +
                datasets = load_translation_dataset(data_path)
         | 
| 655 | 
            +
                prompt_template = get_few_shot_prompt(datasets["train"], num_shots=0)
         | 
| 656 | 
            +
             | 
| 657 | 
            +
                df_train = datasets["train"].to_pandas()
         | 
| 658 | 
            +
                messages = []
         | 
| 659 | 
            +
             | 
| 660 | 
            +
                for i, row in df_train.iterrows():
         | 
| 661 | 
            +
                    messages.append(
         | 
| 662 | 
            +
                        [
         | 
| 663 | 
            +
                            {
         | 
| 664 | 
            +
                                "role": "system",
         | 
| 665 | 
            +
                                "content": system_prompt,
         | 
| 666 | 
            +
                            },
         | 
| 667 | 
            +
                            {
         | 
| 668 | 
            +
                                "role": "user",
         | 
| 669 | 
            +
                                "content": prompt_template.format(input=row["chinese"]),
         | 
| 670 | 
            +
                            },
         | 
| 671 | 
            +
                            {
         | 
| 672 | 
            +
                                "role": "assistant",
         | 
| 673 | 
            +
                                "content": row["english"],
         | 
| 674 | 
            +
                            },
         | 
| 675 | 
            +
                        ]
         | 
| 676 | 
            +
                    )
         | 
| 677 | 
            +
             | 
| 678 | 
            +
                df_openai = pd.DataFrame(
         | 
| 679 | 
            +
                    {
         | 
| 680 | 
            +
                        "messages": messages,
         | 
| 681 | 
            +
                    }
         | 
| 682 | 
            +
                )
         | 
| 683 | 
            +
                df_openai.to_json(openai_data_path, orient="records", lines=True)
         | 
| 684 | 
            +
                return df_openai
         | 
    	
        logs/{l40-1gpu-rpp.txt → l40-1gpu-rpp-1.txt}
    RENAMED
    
    | 
            File without changes
         | 
    	
        logs/l40-4gpu-1.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        logs/l40-4gpu.txt
    DELETED
    
    | @@ -1,3 +0,0 @@ | |
| 1 | 
            -
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256:289a8bbbf208650bc4a0cc3b86578f8a7db73ef68bbefa3c55c3eedf94a38ed0
         | 
| 3 | 
            -
            size 878270
         | 
|  | |
|  | |
|  | |
|  | 
    	
        logs/openai-gpt-4o-mini-fine-tuned.txt
    ADDED
    
    | @@ -0,0 +1,151 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            Qwen/Qwen2-7B-Instruct None False datasets/mac/mac.tsv results/mac-results.csv False 300
         | 
| 2 | 
            +
            loading env vars from: /Users/inflaton/code/engd/papers/rapget-translation/.env
         | 
| 3 | 
            +
            workding dir: /Users/inflaton/code/engd/papers/rapget-translation
         | 
| 4 | 
            +
            Python 3.11.9
         | 
| 5 | 
            +
            Name: torch
         | 
| 6 | 
            +
            Version: 2.4.0
         | 
| 7 | 
            +
            Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
         | 
| 8 | 
            +
            Home-page: https://pytorch.org/
         | 
| 9 | 
            +
            Author: PyTorch Team
         | 
| 10 | 
            +
            Author-email: packages@pytorch.org
         | 
| 11 | 
            +
            License: BSD-3
         | 
| 12 | 
            +
            Location: /Users/inflaton/anaconda3/envs/rapget/lib/python3.11/site-packages
         | 
| 13 | 
            +
            Requires: filelock, fsspec, jinja2, networkx, sympy, typing-extensions
         | 
| 14 | 
            +
            Required-by: accelerate, peft, torchaudio, torchvision, trl
         | 
| 15 | 
            +
            ---
         | 
| 16 | 
            +
            Name: transformers
         | 
| 17 | 
            +
            Version: 4.43.3
         | 
| 18 | 
            +
            Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
         | 
| 19 | 
            +
            Home-page: https://github.com/huggingface/transformers
         | 
| 20 | 
            +
            Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
         | 
| 21 | 
            +
            Author-email: transformers@huggingface.co
         | 
| 22 | 
            +
            License: Apache 2.0 License
         | 
| 23 | 
            +
            Location: /Users/inflaton/anaconda3/envs/rapget/lib/python3.11/site-packages
         | 
| 24 | 
            +
            Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
         | 
| 25 | 
            +
            Required-by: llamafactory, peft, trl
         | 
| 26 | 
            +
            CPU times: user 8.97 ms, sys: 13.7 ms, total: 22.7 ms
         | 
| 27 | 
            +
            Wall time: 1.91 s
         | 
| 28 | 
            +
            MPS is available
         | 
| 29 | 
            +
            loading existing data from: logs/openai-training-sample.jsonl
         | 
| 30 | 
            +
            messages
         | 
| 31 | 
            +
            0	[{'role': 'system', 'content': 'Marv is a fact...
         | 
| 32 | 
            +
            1	[{'role': 'system', 'content': 'Marv is a fact...
         | 
| 33 | 
            +
            2	[{'role': 'system', 'content': 'Marv is a fact...
         | 
| 34 | 
            +
            FileObject(id='file-IokPHn4YWcniXL4wGnK4xVmn', bytes=3413094, created_at=1723269681, filename='openai-training.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)
         | 
| 35 | 
            +
            FineTuningJob(id='ftjob-TcCo4KtDd3Gp5cnOVky2Rxhh', created_at=1723270136, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=6, batch_size='auto', learning_rate_multiplier='auto'), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-RXHVnD8cqPvqTPdXgZ5rQdl3', result_files=[], seed=1046194933, status='validating_files', trained_tokens=None, training_file='file-IokPHn4YWcniXL4wGnK4xVmn', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
         | 
| 36 | 
            +
            FineTuningJob(id='ftjob-TcCo4KtDd3Gp5cnOVky2Rxhh', created_at=1723270136, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:mastercard::9uaCEFTs', finished_at=1723272532, hyperparameters=Hyperparameters(n_epochs=6, batch_size=18, learning_rate_multiplier=1.8), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-RXHVnD8cqPvqTPdXgZ5rQdl3', result_files=['file-aCppW0GWhhytwe4yKwymNUZl'], seed=1046194933, status='succeeded', trained_tokens=3640956, training_file='file-IokPHn4YWcniXL4wGnK4xVmn', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)
         | 
| 37 | 
            +
            Evaluating model: ft:gpt-4o-mini-2024-07-18:mastercard::9ufuULvy
         | 
| 38 | 
            +
            loading train/test data files
         | 
| 39 | 
            +
            DatasetDict({
         | 
| 40 | 
            +
                train: Dataset({
         | 
| 41 | 
            +
                    features: ['chinese', 'english'],
         | 
| 42 | 
            +
                    num_rows: 4528
         | 
| 43 | 
            +
                })
         | 
| 44 | 
            +
                test: Dataset({
         | 
| 45 | 
            +
                    features: ['chinese', 'english'],
         | 
| 46 | 
            +
                    num_rows: 1133
         | 
| 47 | 
            +
                })
         | 
| 48 | 
            +
            })
         | 
| 49 | 
            +
            --------------------------------------------------
         | 
| 50 | 
            +
            chinese: 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
         | 
| 51 | 
            +
            --------------------------------------------------
         | 
| 52 | 
            +
            english: Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.
         | 
| 53 | 
            +
            *** Evaluating with num_shots: 0
         | 
| 54 | 
            +
            100%|██████████| 1133/1133 [16:48<00:00,  1.12it/s]
         | 
| 55 | 
            +
            gpt-4o-mini/epochs-01 metrics: {'meteor': 0.3785370331806402, 'sacrebleu': {'score': 12.052844230027103, 'counts': [12818, 4623, 2153, 1081], 'totals': [29097, 27964, 26850, 25740], 'precisions': [44.05265147609719, 16.53196967529681, 8.018621973929237, 4.1996891996892], 'bp': 0.9631327655852462, 'sys_len': 29097, 'ref_len': 30190}, 'bleu_scores': {'bleu': 0.12052844230027103, 'precisions': [0.44052651476097193, 0.1653196967529681, 0.08018621973929237, 0.041996891996891994], 'brevity_penalty': 0.9631327655852462, 'length_ratio': 0.9637959589267969, 'translation_length': 29097, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4244007719128182, 'rouge2': 0.17601540674784633, 'rougeL': 0.3693615986543504, 'rougeLsum': 0.3696442718692141}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
         | 
| 56 | 
            +
            Evaluating model: ft:gpt-4o-mini-2024-07-18:mastercard::9ug0Gt3w
         | 
| 57 | 
            +
            loading train/test data files
         | 
| 58 | 
            +
            DatasetDict({
         | 
| 59 | 
            +
                train: Dataset({
         | 
| 60 | 
            +
                    features: ['chinese', 'english'],
         | 
| 61 | 
            +
                    num_rows: 4528
         | 
| 62 | 
            +
                })
         | 
| 63 | 
            +
                test: Dataset({
         | 
| 64 | 
            +
                    features: ['chinese', 'english'],
         | 
| 65 | 
            +
                    num_rows: 1133
         | 
| 66 | 
            +
                })
         | 
| 67 | 
            +
            })
         | 
| 68 | 
            +
            --------------------------------------------------
         | 
| 69 | 
            +
            chinese: 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
         | 
| 70 | 
            +
            --------------------------------------------------
         | 
| 71 | 
            +
            english: Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.
         | 
| 72 | 
            +
            *** Evaluating with num_shots: 0
         | 
| 73 | 
            +
            100%|██████████| 1133/1133 [17:56<00:00,  1.05it/s]
         | 
| 74 | 
            +
            gpt-4o-mini/epochs-02 metrics: {'meteor': 0.3785921332515917, 'sacrebleu': {'score': 12.033706874864837, 'counts': [12801, 4628, 2150, 1076], 'totals': [29076, 27943, 26830, 25722], 'precisions': [44.02600082542303, 16.562287513867517, 8.013417815877748, 4.183189487598165], 'bp': 0.9624112877781842, 'sys_len': 29076, 'ref_len': 30190}, 'bleu_scores': {'bleu': 0.12033706874864836, 'precisions': [0.4402600082542303, 0.16562287513867516, 0.08013417815877749, 0.04183189487598165], 'brevity_penalty': 0.9624112877781842, 'length_ratio': 0.9631003643590593, 'translation_length': 29076, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4235104923203792, 'rouge2': 0.1758318317686482, 'rougeL': 0.36922125683186846, 'rougeLsum': 0.3693808162149962}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
         | 
| 75 | 
            +
            Evaluating model: ft:gpt-4o-mini-2024-07-18:mastercard::9ug5PhpZ
         | 
| 76 | 
            +
            loading train/test data files
         | 
| 77 | 
            +
            DatasetDict({
         | 
| 78 | 
            +
                train: Dataset({
         | 
| 79 | 
            +
                    features: ['chinese', 'english'],
         | 
| 80 | 
            +
                    num_rows: 4528
         | 
| 81 | 
            +
                })
         | 
| 82 | 
            +
                test: Dataset({
         | 
| 83 | 
            +
                    features: ['chinese', 'english'],
         | 
| 84 | 
            +
                    num_rows: 1133
         | 
| 85 | 
            +
                })
         | 
| 86 | 
            +
            })
         | 
| 87 | 
            +
            --------------------------------------------------
         | 
| 88 | 
            +
            chinese: 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
         | 
| 89 | 
            +
            --------------------------------------------------
         | 
| 90 | 
            +
            english: Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.
         | 
| 91 | 
            +
            *** Evaluating with num_shots: 0
         | 
| 92 | 
            +
            100%|██████████| 1133/1133 [17:02<00:00,  1.11it/s]
         | 
| 93 | 
            +
            gpt-4o-mini/epochs-03 metrics: {'meteor': 0.37736228106121694, 'sacrebleu': {'score': 11.933111335430906, 'counts': [12779, 4601, 2124, 1061], 'totals': [29096, 27963, 26848, 25737], 'precisions': [43.920126477866376, 16.453885491542394, 7.911203814064362, 4.122469596301046], 'bp': 0.9630984208616785, 'sys_len': 29096, 'ref_len': 30190}, 'bleu_scores': {'bleu': 0.11933111335430906, 'precisions': [0.4392012647786637, 0.16453885491542394, 0.07911203814064362, 0.041224695963010455], 'brevity_penalty': 0.9630984208616785, 'length_ratio': 0.9637628353759523, 'translation_length': 29096, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4235319934194407, 'rouge2': 0.17493309683581332, 'rougeL': 0.3685697120399035, 'rougeLsum': 0.3689298428303013}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
         | 
| 94 | 
            +
            Evaluating model: ft:gpt-4o-mini-2024-07-18:mastercard::9ugPThQI
         | 
| 95 | 
            +
            loading train/test data files
         | 
| 96 | 
            +
            DatasetDict({
         | 
| 97 | 
            +
                train: Dataset({
         | 
| 98 | 
            +
                    features: ['chinese', 'english'],
         | 
| 99 | 
            +
                    num_rows: 4528
         | 
| 100 | 
            +
                })
         | 
| 101 | 
            +
                test: Dataset({
         | 
| 102 | 
            +
                    features: ['chinese', 'english'],
         | 
| 103 | 
            +
                    num_rows: 1133
         | 
| 104 | 
            +
                })
         | 
| 105 | 
            +
            })
         | 
| 106 | 
            +
            --------------------------------------------------
         | 
| 107 | 
            +
            chinese: 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
         | 
| 108 | 
            +
            --------------------------------------------------
         | 
| 109 | 
            +
            english: Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.
         | 
| 110 | 
            +
            *** Evaluating with num_shots: 0
         | 
| 111 | 
            +
            100%|██████████| 1133/1133 [18:35<00:00,  1.02it/s]
         | 
| 112 | 
            +
            gpt-4o-mini/epochs-04 metrics: {'meteor': 0.37818535038887346, 'sacrebleu': {'score': 11.933285526593995, 'counts': [12797, 4601, 2121, 1061], 'totals': [29110, 27977, 26861, 25749], 'precisions': [43.960838199931295, 16.445651785395146, 7.896206395889952, 4.120548370810517], 'bp': 0.9635791436286372, 'sys_len': 29110, 'ref_len': 30190}, 'bleu_scores': {'bleu': 0.11933285526593994, 'precisions': [0.43960838199931296, 0.16445651785395146, 0.07896206395889951, 0.041205483708105166], 'brevity_penalty': 0.9635791436286371, 'length_ratio': 0.9642265650877774, 'translation_length': 29110, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42372801674771476, 'rouge2': 0.17487358435014705, 'rougeL': 0.36931437347367646, 'rougeLsum': 0.36934766241132383}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
         | 
| 113 | 
            +
             | 
| 114 | 
            +
            Evaluating model: ft:gpt-4o-mini-2024-07-18:mastercard::9ugVLmcB
         | 
| 115 | 
            +
            loading train/test data files
         | 
| 116 | 
            +
            DatasetDict({
         | 
| 117 | 
            +
                train: Dataset({
         | 
| 118 | 
            +
                    features: ['chinese', 'english'],
         | 
| 119 | 
            +
                    num_rows: 4528
         | 
| 120 | 
            +
                })
         | 
| 121 | 
            +
                test: Dataset({
         | 
| 122 | 
            +
                    features: ['chinese', 'english'],
         | 
| 123 | 
            +
                    num_rows: 1133
         | 
| 124 | 
            +
                })
         | 
| 125 | 
            +
            })
         | 
| 126 | 
            +
            --------------------------------------------------
         | 
| 127 | 
            +
            chinese: 老耿端起枪,眯缝���一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
         | 
| 128 | 
            +
            --------------------------------------------------
         | 
| 129 | 
            +
            english: Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.
         | 
| 130 | 
            +
            *** Evaluating with num_shots: 0
         | 
| 131 | 
            +
            100%|██████████| 1133/1133 [15:47<00:00,  1.20it/s]
         | 
| 132 | 
            +
            gpt-4o-mini/epochs-05 metrics: {'meteor': 0.3790673551140706, 'sacrebleu': {'score': 11.955698498650582, 'counts': [12808, 4609, 2126, 1064], 'totals': [29209, 28076, 26959, 25846], 'precisions': [43.849498442260945, 16.416156147599374, 7.88604918580066, 4.116691170780778], 'bp': 0.9669721941455759, 'sys_len': 29209, 'ref_len': 30190}, 'bleu_scores': {'bleu': 0.11955698498650584, 'precisions': [0.4384949844226095, 0.16416156147599373, 0.0788604918580066, 0.041166911707807785], 'brevity_penalty': 0.9669721941455759, 'length_ratio': 0.9675057966213978, 'translation_length': 29209, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42476082012412075, 'rouge2': 0.17559955520032905, 'rougeL': 0.3700113513462385, 'rougeLsum': 0.37012014201963733}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
         | 
| 133 | 
            +
            Evaluating model: ft:gpt-4o-mini-2024-07-18:mastercard::9uaCEFTs
         | 
| 134 | 
            +
            loading train/test data files
         | 
| 135 | 
            +
            DatasetDict({
         | 
| 136 | 
            +
                train: Dataset({
         | 
| 137 | 
            +
                    features: ['chinese', 'english'],
         | 
| 138 | 
            +
                    num_rows: 4528
         | 
| 139 | 
            +
                })
         | 
| 140 | 
            +
                test: Dataset({
         | 
| 141 | 
            +
                    features: ['chinese', 'english'],
         | 
| 142 | 
            +
                    num_rows: 1133
         | 
| 143 | 
            +
                })
         | 
| 144 | 
            +
            })
         | 
| 145 | 
            +
            --------------------------------------------------
         | 
| 146 | 
            +
            chinese: 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
         | 
| 147 | 
            +
            --------------------------------------------------
         | 
| 148 | 
            +
            english: Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.
         | 
| 149 | 
            +
            *** Evaluating with num_shots: 0
         | 
| 150 | 
            +
            100%|██████████| 1133/1133 [15:43<00:00,  1.20it/s]
         | 
| 151 | 
            +
            gpt-4o-mini/epochs-06 metrics: {'meteor': 0.3792226866395673, 'sacrebleu': {'score': 11.982811850915233, 'counts': [12810, 4617, 2137, 1066], 'totals': [29116, 27983, 26868, 25757], 'precisions': [43.996428080780326, 16.499303148340065, 7.95369956825964, 4.138680746981403], 'bp': 0.9637850995333245, 'sys_len': 29116, 'ref_len': 30190}, 'bleu_scores': {'bleu': 0.11982811850915229, 'precisions': [0.43996428080780325, 0.16499303148340064, 0.0795369956825964, 0.04138680746981403], 'brevity_penalty': 0.9637850995333245, 'length_ratio': 0.9644253063928453, 'translation_length': 29116, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4251187202203103, 'rouge2': 0.17553224521896635, 'rougeL': 0.37003282393672954, 'rougeLsum': 0.370114181474168}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
         | 
    	
        logs/openai-training-sample.jsonl
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]}
         | 
| 2 | 
            +
            {"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]}
         | 
| 3 | 
            +
            {"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}
         | 
    	
        notebooks/00b_Data Analysis_Few_Shots.ipynb
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:4d7af735cc4e9c4395e8bba07f2d212edc159f0f8f8cf83281f25f1ea8c2d717
         | 
| 3 | 
            +
            size 3332561
         | 
    	
        notebooks/00c_Data Analysis_Fine_Tuned.ipynb
    CHANGED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        notebooks/00d_Data Analysis_Fine_Tuned_RPP.ipynb
    CHANGED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        notebooks/02_Fine_Tune_OpenAI.ipynb
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        requirements.txt
    CHANGED
    
    | @@ -18,3 +18,4 @@ sentencepiece==0.2.0 | |
| 18 | 
             
            einops==0.8.0
         | 
| 19 | 
             
            accelerate==0.32.0
         | 
| 20 | 
             
            peft==0.11.1
         | 
|  | 
|  | |
| 18 | 
             
            einops==0.8.0
         | 
| 19 | 
             
            accelerate==0.32.0
         | 
| 20 | 
             
            peft==0.11.1
         | 
| 21 | 
            +
            sacrebleu==2.4.2
         | 
    	
        results/mac-results_few_shots_metrics.csv
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:945e48d5773ce3a870e793e410c79148bd34c1b427c7bcd8e9e5ec140e574fa7
         | 
| 3 | 
            +
            size 9379
         | 
    	
        results/mac-results_few_shots_openai.csv
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:eea324569b30d1696a51853cbfc5f7b992a569f464cae0db7a88a38c8024578a
         | 
| 3 | 
            +
            size 2782816
         | 
    	
        results/mac-results_fine_tuned_metrics.csv
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:bbf8e7661be99195444d1c6985179880efdc065fc1856c5fe5a78de14906c064
         | 
| 3 | 
            +
            size 8321
         | 
    	
        scripts/eval-4gpu.sh
    CHANGED
    
    | @@ -16,11 +16,9 @@ grep MemTotal /proc/meminfo | |
| 16 | 
             
            #pip install -r requirements.txt
         | 
| 17 |  | 
| 18 | 
             
            export BATCH_SIZE=1
         | 
| 19 | 
            -
            # export START_REPETITION_PENALTY=1.06
         | 
| 20 | 
            -
            export START_NUM_SHOTS=50
         | 
| 21 |  | 
| 22 | 
             
            #./scripts/eval-model.sh Qwen/Qwen2-72B-Instruct
         | 
| 23 |  | 
| 24 | 
            -
            ./scripts/eval-model.sh shenzhi-wang/Llama3.1-70B-Chinese-Chat
         | 
| 25 |  | 
| 26 | 
            -
             | 
|  | |
| 16 | 
             
            #pip install -r requirements.txt
         | 
| 17 |  | 
| 18 | 
             
            export BATCH_SIZE=1
         | 
|  | |
|  | |
| 19 |  | 
| 20 | 
             
            #./scripts/eval-model.sh Qwen/Qwen2-72B-Instruct
         | 
| 21 |  | 
| 22 | 
            +
            # ./scripts/eval-model.sh shenzhi-wang/Llama3.1-70B-Chinese-Chat
         | 
| 23 |  | 
| 24 | 
            +
            ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-70B-Chinese-Chat checkpoint-280
         |