Spaces:

JeffYang52415
/

LLMEval-Dataset-Parser

Running

App Files Files Community

JeffYang52415 commited on Dec 28, 2024

Commit

b65e855

unverified ·

1 Parent(s): 289c905

feat: add math parser

Browse files

Files changed (4) hide show

.pre-commit-config.yaml +0 -4
llmdataparser/math_parser.py +108 -0
pyproject.toml +0 -3
tests/test_math_parser.py +200 -0

.pre-commit-config.yaml CHANGED Viewed

@@ -65,14 +65,10 @@ repos:
       - id: prettier
         types_or: [markdown, yaml]
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    # Ruff version.
     rev: v0.4.4
     hooks:
-      # Run the linter.
       - id: ruff
         args: [--fix]
-      # Run the formatter.
-      - id: ruff-format
   - repo: https://github.com/kynan/nbstripout
     rev: 0.5.0 # use the latest version
     hooks:

       - id: prettier
         types_or: [markdown, yaml]
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.4.4
     hooks:
       - id: ruff
         args: [--fix]
   - repo: https://github.com/kynan/nbstripout
     rev: 0.5.0 # use the latest version
     hooks:

llmdataparser/math_parser.py ADDED Viewed

	@@ -0,0 +1,108 @@

+from dataclasses import dataclass
+from typing import Any, ClassVar
+from llmdataparser.base_parser import HuggingFaceDatasetParser, HuggingFaceParseEntry
+@dataclass(frozen=True, kw_only=True, slots=True)
+class MATHParseEntry(HuggingFaceParseEntry):
+    """Custom entry class for MATH dataset, with fields specific to this dataset parser."""
+    level: str
+    task_name: str
+    solution: str
+    @classmethod
+    def create(
+        cls,
+        prompt: str,
+        answer: str,
+        raw_question: str,
+        raw_answer: str,
+        level: str,
+        task_name: str,
+        solution: str,
+    ) -> "MATHParseEntry":
+        return cls(
+            prompt=prompt,
+            answer=answer,
+            raw_question=raw_question,
+            raw_answer=raw_answer,
+            level=level,
+            task_name=task_name,
+            solution=solution,
+        )
+class MATHDatasetParser(HuggingFaceDatasetParser[MATHParseEntry]):
+    """Parser for the MATH dataset."""
+    _data_source: ClassVar[str] = "lighteval/MATH"
+    _task_names: ClassVar[list[str]] = [
+        "algebra",
+        "geometry",
+        "calculus",
+        "prealgebra",
+        "intermediate_algebra",
+        "number_theory",
+        "precalculus",
+        "all",
+    ]
+    _default_task: ClassVar[str] = "all"
+    _default_system_prompt: ClassVar[
+        str
+    ] = "Solve the following mathematics problem step by step:"
+    _valid_levels: ClassVar[set[str]] = {
+        f"Level {i}" for i in range(1, 6)
+    }  # Levels 1-5 are valid
+    def _get_task_from_entry(self, data_entry: dict[str, Any]) -> str:
+        """Get the task name from the data entry or fall back to current task."""
+        entry_type = data_entry.get("type")
+        if entry_type and (entry_type in self._task_names):
+            return entry_type
+        return self._current_task or self._default_task
+    def process_entry(
+        self, row: dict[str, Any], task_name: str | None = None, **kwargs: Any
+    ) -> MATHParseEntry:
+        """Process a single MATH dataset entry."""
+        task = task_name or self._get_current_task(row)
+        # Validate and normalize level
+        level = row.get("level")
+        if level not in self._valid_levels:
+            level = "Unknown"
+        return MATHParseEntry.create(
+            prompt=f"{self._system_prompt}\n{row['problem']}",
+            answer=row["solution"],
+            raw_question=row["problem"],
+            raw_answer=row["solution"],
+            level=level,
+            task_name=task,
+            solution=row["solution"],
+        )
+if __name__ == "__main__":
+    # Example usage of MATH parser
+    parser = MATHDatasetParser()
+    # Load the dataset
+    parser.load()
+    # Parse all splits
+    parser.parse()
+    # Get parsed data
+    parsed_data = parser.get_parsed_data
+    # Print example entry
+    if parsed_data:
+        example = parsed_data[0]
+        print("\nExample parsed entry:")
+        print(f"Task: {example.task_name}")
+        print(f"Level: {example.level}")
+        print(f"Question: {example.raw_question}")
+        print(f"Solution: {example.solution}")

pyproject.toml CHANGED Viewed

@@ -49,11 +49,8 @@ profile = "black"
 line_length = 88
 known_first_party = ["llmdataparser"]
 [tool.ruff]
 line-length = 88
-select = ["E", "F"]  # or specify checks explicitly without E501
-ignore = ["E501"]
 [tool.ruff.lint]
 ignore = ["E501"]

 line_length = 88
 known_first_party = ["llmdataparser"]
 [tool.ruff]
 line-length = 88
 [tool.ruff.lint]
 ignore = ["E501"]

tests/test_math_parser.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import pytest
+from llmdataparser.math_parser import MATHDatasetParser, MATHParseEntry
+@pytest.fixture
+def math_parser():
+    """Create a MATH parser instance for testing."""
+    return MATHDatasetParser()
+@pytest.fixture
+def loaded_math_parser(math_parser):
+    """Create and load a MATH parser instance with test split."""
+    math_parser.load(task_name="algebra", split="test")
+    return math_parser
+@pytest.fixture
+def sample_math_entries():
+    """Create sample MATH dataset entries for testing."""
+    return [
+        {
+            "problem": "Solve for x: 2x + 4 = 10",
+            "level": "Level 3",
+            "solution": "Let's solve step by step:\n1) Subtract 4 from both sides: 2x = 6\n2) Divide both sides by 2\n\nTherefore, x = 3",
+            "type": "algebra",
+        },
+        {
+            "problem": "Find the area of a circle with radius 5 units.",
+            "level": "Level 2",
+            "solution": "Area = πr²\nArea = π(5)²\nArea = 25π square units",
+            "type": "geometry",
+        },
+        {
+            "problem": "What is the limit of (x²-1)/(x-1) as x approaches 1?",
+            "level": "Level 4",
+            "solution": "Using L'Hôpital's rule:\nlim(x→1) (x²-1)/(x-1) = lim(x→1) (2x)/(1) = 2",
+            "type": "calculus",
+        },
+    ]
+def test_math_parse_entry_creation_valid():
+    """Test valid creation of MATHParseEntry with all fields."""
+    entry = MATHParseEntry.create(
+        prompt="Test prompt",
+        answer="Test answer",
+        raw_question="Test question",
+        raw_answer="Test solution",
+        level="Level 5",
+        task_name="algebra",
+        solution="Test solution",
+    )
+    assert isinstance(entry, MATHParseEntry)
+    assert entry.prompt == "Test prompt"
+    assert entry.answer == "Test answer"
+    assert entry.raw_question == "Test question"
+    assert entry.raw_answer == "Test solution"
+    assert entry.level == "Level 5"
+    assert entry.task_name == "algebra"
+    assert entry.solution == "Test solution"
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        {
+            "problem": "Solve for x: 2x + 4 = 10",
+            "level": "Level 3",
+            "solution": "x = 3",
+            "type": "algebra",
+        },
+        {
+            "problem": "Find the derivative of f(x) = x²",
+            "level": "Level 4",
+            "solution": "f'(x) = 2x",
+            "type": "calculus",
+        },
+    ],
+)
+def test_process_entry(math_parser, test_case):
+    """Test processing different types of MATH entries."""
+    entry = math_parser.process_entry(test_case, task_name=test_case["type"])
+    assert isinstance(entry, MATHParseEntry)
+    assert (
+        entry.prompt == f"{math_parser._default_system_prompt}\n{test_case['problem']}"
+    )
+    assert entry.answer == test_case["solution"]
+    assert entry.raw_question == test_case["problem"]
+    assert entry.raw_answer == test_case["solution"]
+    assert entry.level == test_case["level"]
+    assert entry.task_name == test_case["type"]
+    assert entry.solution == test_case["solution"]
+def test_math_parser_initialization(math_parser):
+    """Test MATH parser initialization and properties."""
+    assert isinstance(math_parser.task_names, list)
+    assert len(math_parser.task_names) == 8
+    assert math_parser._data_source == "lighteval/MATH"
+    assert math_parser._default_task == "all"
+    assert "algebra" in math_parser.task_names
+    assert "geometry" in math_parser.task_names
+    assert (
+        math_parser.get_huggingface_link
+        == "https://huggingface.co/datasets/lighteval/MATH"
+    )
+    assert "mathematics problem" in math_parser._default_system_prompt.lower()
+def test_get_current_task(math_parser):
+    """Test task name resolution in different scenarios."""
+    # Test with valid type in data entry
+    test_row_with_type = {"type": "algebra"}
+    assert math_parser._get_current_task(test_row_with_type) == "algebra"
+    # Test without type in data entry
+    test_row_without_type = {}
+    math_parser._current_task = "geometry"
+    assert math_parser._get_current_task(test_row_without_type) == "geometry"
+    # Test with invalid type - should return current task
+    test_row_invalid_type = {"type": "invalid_type"}
+    math_parser._current_task = "algebra"
+    assert math_parser._get_current_task(test_row_invalid_type) == "algebra"
+def test_valid_levels(math_parser):
+    """Test handling of valid level values."""
+    for i in range(1, 6):
+        test_row = {
+            "problem": "Test problem",
+            "level": f"Level {i}",
+            "solution": "Test solution",
+            "type": "algebra",
+        }
+        entry = math_parser.process_entry(test_row, task_name="algebra")
+        assert entry.level == f"Level {i}"
+@pytest.mark.parametrize(
+    "invalid_level",
+    [
+        "Level 0",  # Too low
+        "Level 6",  # Too high
+        "Invalid",  # Wrong format
+        None,  # Missing
+        "",  # Empty
+        "level 1",  # Wrong capitalization
+    ],
+)
+def test_invalid_level_handling(math_parser, invalid_level):
+    """Test handling of invalid level values."""
+    test_row = {
+        "problem": "Test problem",
+        "level": invalid_level,
+        "solution": "Test solution",
+        "type": "algebra",
+    }
+    entry = math_parser.process_entry(test_row, task_name="algebra")
+    assert entry.level == "Unknown"
+@pytest.mark.integration
+def test_load_dataset(loaded_math_parser):
+    """Test loading the MATH dataset."""
+    assert loaded_math_parser.raw_data is not None
+    assert loaded_math_parser.split_names == ["test"]
+    assert loaded_math_parser._current_task == "algebra"
+def test_parser_string_representation(loaded_math_parser):
+    """Test string representation of MATH parser."""
+    repr_str = str(loaded_math_parser)
+    assert "MATHDatasetParser" in repr_str
+    assert "lighteval/MATH" in repr_str
+    assert "algebra" in repr_str
+    assert "loaded" in repr_str
+@pytest.mark.integration
+def test_different_splits_parsing(math_parser):
+    """Test parsing different splits of the dataset."""
+    # Load and parse test split
+    math_parser.load(task_name="algebra", split="test")
+    math_parser.parse(split_names="test", force=True)
+    test_count = len(math_parser.get_parsed_data)
+    # Load and parse train split
+    math_parser.load(task_name="algebra", split="train")
+    math_parser.parse(split_names="train", force=True)
+    train_count = len(math_parser.get_parsed_data)
+    assert test_count > 0
+    assert train_count > 0
+    assert train_count != test_count