Spaces:

JeffYang52415
/

LLMEval-Dataset-Parser

Running

App Files Files Community

JeffYang52415 commited on Dec 28, 2024

Commit

e9b694b

unverified ·

1 Parent(s): 424ff6a

feat: add humaneval parser

Browse files

Files changed (2) hide show

llmdataparser/humaneval_parser.py +145 -0
tests/test_humaneval_parser.py +173 -0

llmdataparser/humaneval_parser.py ADDED Viewed

	@@ -0,0 +1,145 @@

+from dataclasses import dataclass
+from typing import Any, ClassVar
+from llmdataparser.base_parser import HuggingFaceDatasetParser, HuggingFaceParseEntry
+from llmdataparser.prompts import HUMANEVAL_SYSTEM_PROMPT
+@dataclass(frozen=True, kw_only=True, slots=True)
+class HumanEvalParseEntry(HuggingFaceParseEntry):
+    """Custom entry class for HumanEval, with fields specific to this dataset parser."""
+    task_id: str
+    task_name: str
+    entry_point: str
+    test: str
+    @classmethod
+    def create(
+        cls,
+        prompt: str,
+        answer: str,
+        raw_question: str,
+        task_id: str,
+        entry_point: str,
+        test: str,
+        task_name: str,
+    ) -> "HumanEvalParseEntry":
+        if not task_id:
+            raise ValueError("Task ID cannot be empty")
+        if not entry_point:
+            raise ValueError("Entry point cannot be empty")
+        return cls(
+            prompt=prompt,
+            answer=answer,
+            raw_question=raw_question,
+            raw_answer=answer,  # In HumanEval, the canonical solution is the raw answer
+            task_id=task_id,
+            entry_point=entry_point,
+            test=test,
+            task_name=task_name,
+        )
+class HumanEvalDatasetParser(HuggingFaceDatasetParser[HumanEvalParseEntry]):
+    """Parser for the HumanEval dataset."""
+    _data_source: ClassVar[str] = "openai/openai_humaneval"
+    _default_task: ClassVar[str] = "openai_humaneval"
+    _task_names: ClassVar[list[str]] = ["openai_humaneval"]
+    _default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
+    def process_entry(
+        self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
+    ) -> HumanEvalParseEntry:
+        """Process a single HumanEval entry."""
+        raw_question = row["prompt"]
+        answer = row["canonical_solution"]
+        task_id = row["task_id"]
+        entry_point = row["entry_point"]
+        test = row["test"]
+        # Combine system prompt with the function signature and docstring
+        prompt = f"{self._system_prompt}\n\n{raw_question}"
+        # Use task_name if provided, otherwise use default
+        task = task_name or self._get_current_task(row)
+        return HumanEvalParseEntry.create(
+            prompt=prompt,
+            answer=answer,
+            raw_question=raw_question,
+            task_id=task_id,
+            entry_point=entry_point,
+            test=test,
+            task_name=task,  # Guarantee non-None
+        )
+class HumanEvalDatasetPlusParser(HumanEvalDatasetParser):
+    """Parser for the HumanEval dataset."""
+    _data_source: ClassVar[str] = "evalplus/humanevalplus"
+    _default_task: ClassVar[str] = "default"
+    _task_names: ClassVar[list[str]] = ["default"]
+    _default_system_prompt: ClassVar[str] = HUMANEVAL_SYSTEM_PROMPT
+    def process_entry(
+        self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
+    ) -> HumanEvalParseEntry:
+        """Process a single HumanEval entry."""
+        raw_question = row["prompt"]
+        answer = row["canonical_solution"]
+        task_id = row["task_id"]
+        entry_point = row["entry_point"]
+        test = row["test"]
+        # Combine system prompt with the function signature and docstring
+        prompt = f"{self._system_prompt}\n\n{raw_question}"
+        # Use task_name if provided, otherwise use default
+        task = task_name or self._get_current_task(row)
+        return HumanEvalParseEntry.create(
+            prompt=prompt,
+            answer=answer,
+            raw_question=raw_question,
+            task_id=task_id,
+            entry_point=entry_point,
+            test=test,
+            task_name=task,  # task is guaranteed to be str from _get_current_task
+        )
+if __name__ == "__main__":
+    # Example usage
+    parser = HumanEvalDatasetParser()
+    # Load the dataset
+    parser.load()
+    # Parse all splits
+    parser.parse()
+    # Get parsed data
+    parsed_data = parser.get_parsed_data
+    # Print example entry
+    if parsed_data:
+        example = parsed_data[0]
+        print("\nExample parsed entry:")
+        print(f"Task ID: {example.task_id}")
+        print(f"Entry Point: {example.entry_point}")
+        print(f"Prompt:\n{example.prompt}")
+        print(f"Solution:\n{example.answer}")
+    parser = HumanEvalDatasetPlusParser()
+    parser.load()
+    parser.parse()
+    parsed_data = parser.get_parsed_data
+    if parsed_data:
+        example = parsed_data[0]
+        print("\nExample parsed entry:")
+        print(f"Task: {example.task_name}")
+        print(f"Question: {example.raw_question}")
+        print(f"Correct Answer: {example.answer}")

tests/test_humaneval_parser.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import pytest
+from llmdataparser.humaneval_parser import (
+    HumanEvalDatasetParser,
+    HumanEvalDatasetPlusParser,
+    HumanEvalParseEntry,
+)
+@pytest.fixture
+def sample_entry():
+    return {
+        "prompt": 'def add(a, b):\n    """Add two numbers."""\n',
+        "canonical_solution": "def add(a, b):\n    return a + b\n",
+        "task_id": "HumanEval/0",
+        "entry_point": "add",
+        "test": "def test_add(): assert add(2, 3) == 5",
+    }
+@pytest.fixture
+def parser():
+    return HumanEvalDatasetParser()
+@pytest.fixture
+def plus_parser():
+    return HumanEvalDatasetPlusParser()
+@pytest.fixture
+def plus_sample_entry():
+    return {
+        "prompt": 'def add(a, b):\n    """Add two numbers."""\n',
+        "canonical_solution": "def add(a, b):\n    return a + b\n",
+        "task_id": "HumanEval/0",
+        "entry_point": "add",
+        "test": "def test_add(): assert add(2, 3) == 5",
+    }
+def test_humaneval_parse_entry_creation():
+    """Test creation of HumanEvalParseEntry"""
+    entry = HumanEvalParseEntry.create(
+        prompt="test prompt",
+        answer="test answer",
+        raw_question="raw question",
+        task_id="HumanEval/1",
+        entry_point="test_func",
+        test="test case",
+        task_name="openai_humaneval",
+    )
+    assert entry.prompt == "test prompt"
+    assert entry.answer == "test answer"
+    assert entry.raw_question == "raw question"
+    assert entry.raw_answer == "test answer"  # Should match answer
+    assert entry.task_id == "HumanEval/1"
+    assert entry.entry_point == "test_func"
+    assert entry.test == "test case"
+    assert entry.task_name == "openai_humaneval"
+def test_humaneval_parse_entry_validation():
+    """Test validation of required fields"""
+    with pytest.raises(ValueError, match="Task ID cannot be empty"):
+        HumanEvalParseEntry.create(
+            prompt="test",
+            answer="test",
+            raw_question="test",
+            task_id="",  # Empty task_id should raise error
+            entry_point="test",
+            test="test",
+            task_name="test",
+        )
+    with pytest.raises(ValueError, match="Entry point cannot be empty"):
+        HumanEvalParseEntry.create(
+            prompt="test",
+            answer="test",
+            raw_question="test",
+            task_id="test",
+            entry_point="",  # Empty entry_point should raise error
+            test="test",
+            task_name="test",
+        )
+def test_process_entry(parser, sample_entry):
+    """Test processing of a single entry"""
+    result = parser.process_entry(sample_entry, task_name="openai_humaneval")
+    assert isinstance(result, HumanEvalParseEntry)
+    assert result.task_id == "HumanEval/0"
+    assert result.entry_point == "add"
+    assert (
+        result.prompt == f"{parser._default_system_prompt}\n\n{sample_entry['prompt']}"
+    )
+    assert result.answer == sample_entry["canonical_solution"]
+    assert result.test == sample_entry["test"]
+    assert result.task_name == "openai_humaneval"
+def test_parser_initialization(parser):
+    """Test parser initialization and properties"""
+    assert parser._data_source == "openai/openai_humaneval"
+    assert parser._default_task == "openai_humaneval"
+    assert parser._task_names == ["openai_humaneval"]
+    assert (
+        parser.get_huggingface_link
+        == "https://huggingface.co/datasets/openai/openai_humaneval"
+    )
+@pytest.mark.integration
+def test_parser_load_and_parse(parser):
+    """Integration test for loading and parsing data"""
+    parser.load()
+    parser.parse()
+    parsed_data = parser.get_parsed_data
+    assert len(parsed_data) > 0
+    assert all(isinstance(entry, HumanEvalParseEntry) for entry in parsed_data)
+def test_get_current_task(parser, sample_entry):
+    """Test _get_current_task method"""
+    task = parser._get_current_task(sample_entry)
+    assert task == parser._default_task
+def test_plus_parser_initialization(plus_parser):
+    """Test HumanEvalDatasetPlusParser initialization and properties"""
+    assert plus_parser._data_source == "evalplus/humanevalplus"
+    assert plus_parser._default_task == "default"
+    assert plus_parser._task_names == ["default"]
+    assert (
+        plus_parser.get_huggingface_link
+        == "https://huggingface.co/datasets/evalplus/humanevalplus"
+    )
+def test_plus_process_entry(plus_parser, plus_sample_entry):
+    """Test processing of a single entry in HumanEvalDatasetPlusParser"""
+    result = plus_parser.process_entry(plus_sample_entry, task_name="default")
+    assert isinstance(result, HumanEvalParseEntry)
+    assert result.task_id == "HumanEval/0"
+    assert result.entry_point == "add"
+    assert (
+        result.prompt
+        == f"{plus_parser._default_system_prompt}\n\n{plus_sample_entry['prompt']}"
+    )
+    assert result.answer == plus_sample_entry["canonical_solution"]
+    assert result.test == plus_sample_entry["test"]
+    assert result.task_name == "default"
+@pytest.mark.integration
+def test_plus_parser_load_and_parse(plus_parser):
+    """Integration test for loading and parsing data with HumanEvalDatasetPlusParser"""
+    plus_parser.load()
+    plus_parser.parse()
+    parsed_data = plus_parser.get_parsed_data
+    assert len(parsed_data) > 0
+    assert all(isinstance(entry, HumanEvalParseEntry) for entry in parsed_data)
+def test_plus_get_current_task(plus_parser, plus_sample_entry):
+    """Test _get_current_task method for HumanEvalDatasetPlusParser"""
+    task = plus_parser._get_current_task(plus_sample_entry)
+    assert task == plus_parser._default_task