Spaces:
Paused
Paused
File size: 11,824 Bytes
f647629 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
# import pytest
# import re
# import inspect
# import ast
# import json
# import os
# from src.wandb_mcp_server.server import query_wandb_tool # Assuming src is importable
# # --- Configuration ---
# TARGET_ENTITY = "c-metrics"
# TARGET_PROJECT = "hallucination"
# # --- Helper Function to Extract Examples ---
# def extract_gql_examples_from_docstring(docstring):
# """Parses a docstring to extract GraphQL examples marked by specific delimiters."""
# examples = []
# # Regex to find the blocks delimited by <!-- WANDB_GQL_EXAMPLE_START/END -->
# # Restore original regex with backreference
# example_pattern = re.compile(
# r'<!-- WANDB_GQL_EXAMPLE_START name=(\w+) -->(.*?)<!-- WANDB_GQL_EXAMPLE_END name=\1 -->', # Restored \1
# re.DOTALL
# )
# # Regex to find graphql code blocks
# graphql_pattern = re.compile(r'\s*```graphql\s*\n(.*?)\n\s*```', re.DOTALL)
# # Regex to find python code blocks
# python_pattern = re.compile(r'\s*```python\s*\n(.*?)\n\s*```', re.DOTALL)
# # --- DEBUGGING ---
# print(f"\n>>> DEBUG: Inside extract_gql_examples_from_docstring")
# print(f" Attempting to find matches with pattern: {example_pattern.pattern}")
# print(f" in docstring of length {len(docstring)}")
# matches_found = 0
# # --- END DEBUGGING ---
# for match in example_pattern.finditer(docstring):
# # --- DEBUGGING ---
# matches_found += 1
# print(f" >>> Found match {matches_found}: name='{match.group(1)}'")
# # --- END DEBUGGING ---
# name = match.group(1)
# content = match.group(2)
# # --- DEBUGGING ---
# print(f" --- Content for '{name}' start ---")
# print(content)
# print(f" --- Content for '{name}' end ---")
# # --- END DEBUGGING ---
# graphql_match = graphql_pattern.search(content)
# python_match = python_pattern.search(content)
# if graphql_match and python_match:
# query = graphql_match.group(1).strip()
# # Extract the python code string, removing comments if necessary for exec
# variables_code_str = python_match.group(1).strip()
# # Remove comments starting with # to avoid issues with exec
# variables_code_str = re.sub(r'^#.*$', '', variables_code_str, flags=re.MULTILINE).strip()
# # Attempt to parse the variable assignment part more robustly if it's simple
# try:
# # A simple approach might assume the last line is `variables = ...`
# # More robustly, find the assignment
# assignment_match = re.search(r'variables\s*=\s*(\{.*?\})', variables_code_str, re.DOTALL)
# variables_dict_code = assignment_match.group(1) if assignment_match else variables_code_str
# # --- DEBUGGING ---
# print(f" >>> Appending example: {name}")
# # --- END DEBUGGING ---
# examples.append({
# "name": name,
# "query": query,
# "variables_code": variables_dict_code # Store the code string for the dict/assignment
# })
# except Exception as e:
# print(f"Warning: Could not parse variables for example '{name}'. Error: {e}")
# # Decide if you want to skip or add with None/error marker
# # examples.append({"name": name, "query": query, "variables_code": None, "error": str(e)})
# # --- DEBUGGING ---
# print(f" Finished finditer loop. Total matches found: {matches_found}")
# print(f"<<< DEBUG: Exiting extract_gql_examples_from_docstring\n")
# # --- END DEBUGGING ---
# if not examples:
# raise ValueError("No examples found in docstring. Check delimiters and file content.")
# return examples
# # --- Pytest Fixture for Loading Examples ---
# @pytest.fixture(scope="session")
# def gql_examples():
# """Reads the target function's docstring and extracts GQL examples."""
# try:
# target_docstring = inspect.getdoc(query_wandb_tool)
# if not target_docstring:
# raise ImportError(f"Could not get docstring for query_wandb_tool.")
# # --- DEBUGGING: Print the retrieved docstring ---
# print("\n--- Retrieved Docstring by inspect.getdoc() ---")
# print(target_docstring)
# print("--- End of Retrieved Docstring ---\n")
# # --- END DEBUGGING ---
# extracted = extract_gql_examples_from_docstring(target_docstring)
# # Filter out examples where variables couldn't be parsed if the helper function indicates so
# valid_examples = [ex for ex in extracted if ex.get("variables_code")]
# if not valid_examples:
# raise ValueError("No valid examples with variable code found after parsing.")
# return valid_examples
# except Exception as e:
# # pytest will report this error during fixture setup
# pytest.fail(f"Failed to setup gql_examples fixture: {e}", pytrace=False)
# _example_names = []
# try:
# # Attempt to pre-load examples just to get names for parameterization
# # Note: This duplicates loading but simplifies parametrize setup
# # The fixture ensures the main test execution uses the proper setup/cached result.
# _target_docstring = inspect.getdoc(query_wandb_tool)
# if not _target_docstring:
# raise ImportError("Docstring not found at collection time.")
# _extracted_examples = extract_gql_examples_from_docstring(_target_docstring)
# _example_names = [ex["name"] for ex in _extracted_examples if ex.get("variables_code")]
# if not _example_names:
# raise ValueError("No valid example names found at collection time.")
# except Exception as e:
# print(f"Warning during test collection: Could not pre-load example names - {e}")
# # If collection fails to get names, the test function relying on the fixture
# # will fail later during setup/execution, which is acceptable.
# _example_names = ["SETUP_ERROR_DURING_COLLECTION"] # Provide a placeholder
# # --- Test Function ---
# # Apply the live_api marker
# @pytest.mark.live_api
# @pytest.mark.parametrize(
# "name", # Parametrize only by the example name
# _example_names
# )
# def test_wandb_gql_example(name, gql_examples): # Inject fixture here, remove query/variables_code
# """Runs a test for each extracted GraphQL example using live API calls."""
# if name == "SETUP_ERROR_DURING_COLLECTION":
# pytest.fail("Test collection could not determine example names. Check setup.")
# # Find the correct example data from the fixture result based on the parameterized name
# example_data = next((ex for ex in gql_examples if ex['name'] == name), None)
# if not example_data:
# pytest.fail(f"Could not find example data for name '{name}' in gql_examples fixture result.")
# # Use the data looked up from the fixture
# query = example_data["query"]
# variables_code = example_data["variables_code"]
# # The rest of the test logic remains largely the same...
# print(f"\nRunning test for example: {name}")
# print(f"Query:\n{query}")
# print(f"Variables Code:\n{variables_code}")
# variables = {}
# try:
# # Execute the Python code string to get the variables dictionary.
# # Reverting to exec as ast.literal_eval cannot handle nested strings required for JSON literals.
# local_scope = {'json': json} # Provide json module in the execution scope
# # The variable `variables_code` should contain the raw python code from the docstring block
# exec(variables_code, local_scope)
# # Check if 'variables' was defined in the executed code
# if 'variables' not in local_scope:
# raise NameError("Executed code snippet did not define a 'variables' dictionary.")
# variables = local_scope['variables']
# if not isinstance(variables, dict):
# raise TypeError(f"Executed code defined 'variables', but it is not a dictionary. Got: {type(variables)}")
# print(f"Original Variables: {variables}")
# # Override entity and project for the test run
# # Check if the keys exist before assigning, especially for mutations
# if 'entity' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'):
# variables['entity'] = TARGET_ENTITY
# if 'project' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'):
# variables['project'] = TARGET_PROJECT
# # Handle entityName/projectName variants if needed
# if 'entityName' in variables:
# variables['entityName'] = TARGET_ENTITY
# if 'projectName' in variables:
# variables['projectName'] = TARGET_PROJECT
# # Specific override for GetArtifactDetails test
# if name == 'GetArtifactDetails':
# # Use the specific artifact name provided by the user
# variables['artifactName'] = "c-metrics/hallucination/SmolLM2-360M-sft-hallu:v12"
# print(f" Overriding artifactName for {name} test.") # Debug print
# # Handle mutations which might not have standard entity/project vars
# if name == 'UpsertProject' or name == 'CreateProject':
# # Ensure the mutation targets the test entity, adjust name if needed
# variables['entity'] = TARGET_ENTITY
# variables['name'] = f"{TARGET_PROJECT}-test-upsert" # Avoid conflicts
# # Handle cases where limit might be needed but not in example vars (like mutations)
# # For mutations, the tool itself might not use max_items, depends on implementation
# # For queries, ensure a reasonable limit if not present? Or rely on tool default.
# # Let's rely on the tool's default `max_items` for now.
# print(f"Modified Variables: {variables}")
# except Exception as e:
# pytest.fail(f"Failed to execute or modify variables code for example '{name}': {e}\nCode: {variables_code}")
# # --- Make the Live API Call ---
# try:
# # Use default max_items and items_per_page from the tool's signature
# result = query_wandb_tool(query=query, variables=variables)
# print(f"API Result for {name}: {result}")
# # --- Assertions ---
# assert isinstance(result, dict), f"Expected result to be a dictionary, got {type(result)}"
# # Check specifically for the 'errors' key which indicates GraphQL level errors
# if 'errors' in result:
# # Sometimes 'errors' is present but None or empty list, check content
# error_content = result.get('errors')
# assert not error_content, f"GraphQL API returned errors for example '{name}': {error_content}"
# # Optional: Add more specific checks based on the query name if needed
# # e.g., if name == "GetProjectInfo": assert "project" in result.get("data", {})
# except Exception as e:
# pytest.fail(f"query_wandb_tool raised an exception for example '{name}': {e}")
# # Note: This test makes live calls to the W&B API. Ensure:
# # 1. You are logged into W&B (e.g., via `wandb login`).
# # 2. The target project (c-metrics/hallucination) exists and is accessible.
# # 3. Network connectivity is available.
# # 4. Be mindful of API rate limits if running frequently.
# # To run only these tests: pytest -m live_api
# # To skip these tests: pytest -m "not live_api"
|