File size: 5,226 Bytes
3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed a0888ca 3a7aaed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import json\n",
"from phoenix.client import Client\n",
"\n",
"# Load the existing spans\n",
"spans_df = Client().spans.get_spans_dataframe(project_name=\"default\")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# Load the source of truth\n",
"dataset_df = pd.read_json(\"../data/metadata.jsonl\", lines=True)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"# Filter by root agents\n",
"agents_df = spans_df[(spans_df.span_kind == 'AGENT') & (spans_df.parent_id.isna()) & (spans_df.status_code == 'OK')]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/pj/v1zrqj1d10x9_1rd2njh_r_r0000gn/T/ipykernel_36696/3107371246.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" agents_df[\"task\"] = agents_df[\"attributes.input.value\"].apply(json.loads).apply(lambda x : x[\"task\"]).str.replace(r'\\s*The mentionned file can be downloaded from.*$', '', regex=True)\n"
]
}
],
"source": [
"# Retrieve the right question and add the answer\n",
"agents_df[\"task\"] = agents_df[\"attributes.input.value\"].apply(json.loads).apply(lambda x : x[\"task\"]).str.replace(r'\\s*The mentionned file can be downloaded from.*$', '', regex=True)\n",
"agents_merged_df = pd.merge(agents_df,dataset_df,how=\"left\",left_on=\"task\", right_on=\"Question\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"from phoenix.evals.evaluators import bind_evaluator, async_evaluate_dataframe\n",
"from evaluators import conciseness_evaluator\n",
"from scorer import question_scorer_wrapper as question_scorer\n",
"\n",
"# Define the evaluator\n",
"conciseness_evaluator = bind_evaluator(evaluator=conciseness_evaluator, input_mapping={ \"output\": \"attributes.output.value\", \"expected\": \"Final answer\"})\n",
"question_scorer_eval = bind_evaluator(evaluator=question_scorer, input_mapping={ \"output\": \"attributes.output.value\", \"expected\": \"Final answer\"})\n",
"results_df = await async_evaluate_dataframe(agents_merged_df, evaluators=[exact_match_eval, conciseness_evaluator, question_scorer_eval])\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"results_df[\"conciseness\"] = results_df.conciseness_evaluator_score.apply(json.loads).apply(lambda x : x[\"label\"])\n",
"results_df[\"question_scorer\"] = results_df.question_scorer_score.apply(json.loads).apply(lambda x : x[\"score\"])\n",
"results_df[\"agent_type\"] = results_df[\"attributes.smolagents\"].apply(lambda x : \"multi_agent\" if \"managed_agents\" in x else \"llm_agent\")\n",
"results_filtered_df = results_df[[\"name\", \"span_kind\", \"start_time\", \"context.span_id\", \"context.trace_id\",\"attributes.output.value\", \"task_id\", \"Question\", \"Final answer\", \"agent_type\", \"conciseness_evaluator_score\", \"question_scorer_score\", \"conciseness\", \"question_scorer\"]]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/romainfayoux/Documents/Programmation/Final_Assignment_Template/.venv/lib/python3.12/site-packages/phoenix/evals/utils.py:367: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
" result_df = pd.concat(result_dfs, ignore_index=True)\n"
]
}
],
"source": [
"# Upload results\n",
"import numpy as np\n",
"from phoenix.evals.utils import to_annotation_dataframe\n",
"\n",
"annotation_df = to_annotation_dataframe(results_filtered_df)\n",
"annotation_df = annotation_df.replace({np.nan: None})\n",
"Client().spans.log_span_annotations_dataframe(dataframe=annotation_df)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Final_Assignment_Template",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|