# import pytest # import re # import inspect # import ast # import json # import os # from src.wandb_mcp_server.server import query_wandb_tool # Assuming src is importable # # --- Configuration --- # TARGET_ENTITY = "c-metrics" # TARGET_PROJECT = "hallucination" # # --- Helper Function to Extract Examples --- # def extract_gql_examples_from_docstring(docstring): # """Parses a docstring to extract GraphQL examples marked by specific delimiters.""" # examples = [] # # Regex to find the blocks delimited by # # Restore original regex with backreference # example_pattern = re.compile( # r'(.*?)', # Restored \1 # re.DOTALL # ) # # Regex to find graphql code blocks # graphql_pattern = re.compile(r'\s*```graphql\s*\n(.*?)\n\s*```', re.DOTALL) # # Regex to find python code blocks # python_pattern = re.compile(r'\s*```python\s*\n(.*?)\n\s*```', re.DOTALL) # # --- DEBUGGING --- # print(f"\n>>> DEBUG: Inside extract_gql_examples_from_docstring") # print(f" Attempting to find matches with pattern: {example_pattern.pattern}") # print(f" in docstring of length {len(docstring)}") # matches_found = 0 # # --- END DEBUGGING --- # for match in example_pattern.finditer(docstring): # # --- DEBUGGING --- # matches_found += 1 # print(f" >>> Found match {matches_found}: name='{match.group(1)}'") # # --- END DEBUGGING --- # name = match.group(1) # content = match.group(2) # # --- DEBUGGING --- # print(f" --- Content for '{name}' start ---") # print(content) # print(f" --- Content for '{name}' end ---") # # --- END DEBUGGING --- # graphql_match = graphql_pattern.search(content) # python_match = python_pattern.search(content) # if graphql_match and python_match: # query = graphql_match.group(1).strip() # # Extract the python code string, removing comments if necessary for exec # variables_code_str = python_match.group(1).strip() # # Remove comments starting with # to avoid issues with exec # variables_code_str = re.sub(r'^#.*$', '', variables_code_str, flags=re.MULTILINE).strip() # # Attempt to parse the variable assignment part more robustly if it's simple # try: # # A simple approach might assume the last line is `variables = ...` # # More robustly, find the assignment # assignment_match = re.search(r'variables\s*=\s*(\{.*?\})', variables_code_str, re.DOTALL) # variables_dict_code = assignment_match.group(1) if assignment_match else variables_code_str # # --- DEBUGGING --- # print(f" >>> Appending example: {name}") # # --- END DEBUGGING --- # examples.append({ # "name": name, # "query": query, # "variables_code": variables_dict_code # Store the code string for the dict/assignment # }) # except Exception as e: # print(f"Warning: Could not parse variables for example '{name}'. Error: {e}") # # Decide if you want to skip or add with None/error marker # # examples.append({"name": name, "query": query, "variables_code": None, "error": str(e)}) # # --- DEBUGGING --- # print(f" Finished finditer loop. Total matches found: {matches_found}") # print(f"<<< DEBUG: Exiting extract_gql_examples_from_docstring\n") # # --- END DEBUGGING --- # if not examples: # raise ValueError("No examples found in docstring. Check delimiters and file content.") # return examples # # --- Pytest Fixture for Loading Examples --- # @pytest.fixture(scope="session") # def gql_examples(): # """Reads the target function's docstring and extracts GQL examples.""" # try: # target_docstring = inspect.getdoc(query_wandb_tool) # if not target_docstring: # raise ImportError(f"Could not get docstring for query_wandb_tool.") # # --- DEBUGGING: Print the retrieved docstring --- # print("\n--- Retrieved Docstring by inspect.getdoc() ---") # print(target_docstring) # print("--- End of Retrieved Docstring ---\n") # # --- END DEBUGGING --- # extracted = extract_gql_examples_from_docstring(target_docstring) # # Filter out examples where variables couldn't be parsed if the helper function indicates so # valid_examples = [ex for ex in extracted if ex.get("variables_code")] # if not valid_examples: # raise ValueError("No valid examples with variable code found after parsing.") # return valid_examples # except Exception as e: # # pytest will report this error during fixture setup # pytest.fail(f"Failed to setup gql_examples fixture: {e}", pytrace=False) # _example_names = [] # try: # # Attempt to pre-load examples just to get names for parameterization # # Note: This duplicates loading but simplifies parametrize setup # # The fixture ensures the main test execution uses the proper setup/cached result. # _target_docstring = inspect.getdoc(query_wandb_tool) # if not _target_docstring: # raise ImportError("Docstring not found at collection time.") # _extracted_examples = extract_gql_examples_from_docstring(_target_docstring) # _example_names = [ex["name"] for ex in _extracted_examples if ex.get("variables_code")] # if not _example_names: # raise ValueError("No valid example names found at collection time.") # except Exception as e: # print(f"Warning during test collection: Could not pre-load example names - {e}") # # If collection fails to get names, the test function relying on the fixture # # will fail later during setup/execution, which is acceptable. # _example_names = ["SETUP_ERROR_DURING_COLLECTION"] # Provide a placeholder # # --- Test Function --- # # Apply the live_api marker # @pytest.mark.live_api # @pytest.mark.parametrize( # "name", # Parametrize only by the example name # _example_names # ) # def test_wandb_gql_example(name, gql_examples): # Inject fixture here, remove query/variables_code # """Runs a test for each extracted GraphQL example using live API calls.""" # if name == "SETUP_ERROR_DURING_COLLECTION": # pytest.fail("Test collection could not determine example names. Check setup.") # # Find the correct example data from the fixture result based on the parameterized name # example_data = next((ex for ex in gql_examples if ex['name'] == name), None) # if not example_data: # pytest.fail(f"Could not find example data for name '{name}' in gql_examples fixture result.") # # Use the data looked up from the fixture # query = example_data["query"] # variables_code = example_data["variables_code"] # # The rest of the test logic remains largely the same... # print(f"\nRunning test for example: {name}") # print(f"Query:\n{query}") # print(f"Variables Code:\n{variables_code}") # variables = {} # try: # # Execute the Python code string to get the variables dictionary. # # Reverting to exec as ast.literal_eval cannot handle nested strings required for JSON literals. # local_scope = {'json': json} # Provide json module in the execution scope # # The variable `variables_code` should contain the raw python code from the docstring block # exec(variables_code, local_scope) # # Check if 'variables' was defined in the executed code # if 'variables' not in local_scope: # raise NameError("Executed code snippet did not define a 'variables' dictionary.") # variables = local_scope['variables'] # if not isinstance(variables, dict): # raise TypeError(f"Executed code defined 'variables', but it is not a dictionary. Got: {type(variables)}") # print(f"Original Variables: {variables}") # # Override entity and project for the test run # # Check if the keys exist before assigning, especially for mutations # if 'entity' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'): # variables['entity'] = TARGET_ENTITY # if 'project' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'): # variables['project'] = TARGET_PROJECT # # Handle entityName/projectName variants if needed # if 'entityName' in variables: # variables['entityName'] = TARGET_ENTITY # if 'projectName' in variables: # variables['projectName'] = TARGET_PROJECT # # Specific override for GetArtifactDetails test # if name == 'GetArtifactDetails': # # Use the specific artifact name provided by the user # variables['artifactName'] = "c-metrics/hallucination/SmolLM2-360M-sft-hallu:v12" # print(f" Overriding artifactName for {name} test.") # Debug print # # Handle mutations which might not have standard entity/project vars # if name == 'UpsertProject' or name == 'CreateProject': # # Ensure the mutation targets the test entity, adjust name if needed # variables['entity'] = TARGET_ENTITY # variables['name'] = f"{TARGET_PROJECT}-test-upsert" # Avoid conflicts # # Handle cases where limit might be needed but not in example vars (like mutations) # # For mutations, the tool itself might not use max_items, depends on implementation # # For queries, ensure a reasonable limit if not present? Or rely on tool default. # # Let's rely on the tool's default `max_items` for now. # print(f"Modified Variables: {variables}") # except Exception as e: # pytest.fail(f"Failed to execute or modify variables code for example '{name}': {e}\nCode: {variables_code}") # # --- Make the Live API Call --- # try: # # Use default max_items and items_per_page from the tool's signature # result = query_wandb_tool(query=query, variables=variables) # print(f"API Result for {name}: {result}") # # --- Assertions --- # assert isinstance(result, dict), f"Expected result to be a dictionary, got {type(result)}" # # Check specifically for the 'errors' key which indicates GraphQL level errors # if 'errors' in result: # # Sometimes 'errors' is present but None or empty list, check content # error_content = result.get('errors') # assert not error_content, f"GraphQL API returned errors for example '{name}': {error_content}" # # Optional: Add more specific checks based on the query name if needed # # e.g., if name == "GetProjectInfo": assert "project" in result.get("data", {}) # except Exception as e: # pytest.fail(f"query_wandb_tool raised an exception for example '{name}': {e}") # # Note: This test makes live calls to the W&B API. Ensure: # # 1. You are logged into W&B (e.g., via `wandb login`). # # 2. The target project (c-metrics/hallucination) exists and is accessible. # # 3. Network connectivity is available. # # 4. Be mindful of API rate limits if running frequently. # # To run only these tests: pytest -m live_api # # To skip these tests: pytest -m "not live_api"