File size: 11,824 Bytes
f647629
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# import pytest
# import re
# import inspect
# import ast
# import json
# import os
# from src.wandb_mcp_server.server import query_wandb_tool # Assuming src is importable


# # --- Configuration ---
# TARGET_ENTITY = "c-metrics"
# TARGET_PROJECT = "hallucination"

# # --- Helper Function to Extract Examples ---

# def extract_gql_examples_from_docstring(docstring):
#     """Parses a docstring to extract GraphQL examples marked by specific delimiters."""
#     examples = []
#     # Regex to find the blocks delimited by <!-- WANDB_GQL_EXAMPLE_START/END -->
#     # Restore original regex with backreference
#     example_pattern = re.compile(
#         r'<!-- WANDB_GQL_EXAMPLE_START name=(\w+) -->(.*?)<!-- WANDB_GQL_EXAMPLE_END name=\1 -->', # Restored \1
#         re.DOTALL
#     )
#     # Regex to find graphql code blocks
#     graphql_pattern = re.compile(r'\s*```graphql\s*\n(.*?)\n\s*```', re.DOTALL)
#     # Regex to find python code blocks
#     python_pattern = re.compile(r'\s*```python\s*\n(.*?)\n\s*```', re.DOTALL)

#     # --- DEBUGGING ---
#     print(f"\n>>> DEBUG: Inside extract_gql_examples_from_docstring")
#     print(f"    Attempting to find matches with pattern: {example_pattern.pattern}")
#     print(f"    in docstring of length {len(docstring)}")
#     matches_found = 0
#     # --- END DEBUGGING ---

#     for match in example_pattern.finditer(docstring):
#         # --- DEBUGGING ---
#         matches_found += 1
#         print(f"    >>> Found match {matches_found}: name='{match.group(1)}'")
#         # --- END DEBUGGING ---

#         name = match.group(1)
#         content = match.group(2)

#         # --- DEBUGGING ---
#         print(f"        --- Content for '{name}' start ---")
#         print(content)
#         print(f"        --- Content for '{name}' end ---")
#         # --- END DEBUGGING ---

#         graphql_match = graphql_pattern.search(content)
#         python_match = python_pattern.search(content)

#         if graphql_match and python_match:
#             query = graphql_match.group(1).strip()
#             # Extract the python code string, removing comments if necessary for exec
#             variables_code_str = python_match.group(1).strip()
#             # Remove comments starting with # to avoid issues with exec
#             variables_code_str = re.sub(r'^#.*$', '', variables_code_str, flags=re.MULTILINE).strip()

#             # Attempt to parse the variable assignment part more robustly if it's simple
#             try:
#                 # A simple approach might assume the last line is `variables = ...`
#                 # More robustly, find the assignment
#                 assignment_match = re.search(r'variables\s*=\s*(\{.*?\})', variables_code_str, re.DOTALL)
#                 variables_dict_code = assignment_match.group(1) if assignment_match else variables_code_str
#                 # --- DEBUGGING ---
#                 print(f"        >>> Appending example: {name}")
#                 # --- END DEBUGGING ---
#                 examples.append({
#                     "name": name,
#                     "query": query,
#                     "variables_code": variables_dict_code # Store the code string for the dict/assignment
#                 })
#             except Exception as e:
#                 print(f"Warning: Could not parse variables for example '{name}'. Error: {e}")
#                 # Decide if you want to skip or add with None/error marker
#                 # examples.append({"name": name, "query": query, "variables_code": None, "error": str(e)})

#     # --- DEBUGGING ---
#     print(f"    Finished finditer loop. Total matches found: {matches_found}")
#     print(f"<<< DEBUG: Exiting extract_gql_examples_from_docstring\n")
#     # --- END DEBUGGING ---

#     if not examples:
#          raise ValueError("No examples found in docstring. Check delimiters and file content.")

#     return examples

# # --- Pytest Fixture for Loading Examples ---
# @pytest.fixture(scope="session")
# def gql_examples():
#     """Reads the target function's docstring and extracts GQL examples."""
#     try:
#         target_docstring = inspect.getdoc(query_wandb_tool)
#         if not target_docstring:
#             raise ImportError(f"Could not get docstring for query_wandb_tool.")

#         # --- DEBUGGING: Print the retrieved docstring ---
#         print("\n--- Retrieved Docstring by inspect.getdoc() ---")
#         print(target_docstring)
#         print("--- End of Retrieved Docstring ---\n")
#         # --- END DEBUGGING ---

#         extracted = extract_gql_examples_from_docstring(target_docstring)
#         # Filter out examples where variables couldn't be parsed if the helper function indicates so
#         valid_examples = [ex for ex in extracted if ex.get("variables_code")]
#         if not valid_examples:
#              raise ValueError("No valid examples with variable code found after parsing.")
#         return valid_examples
#     except Exception as e:
#         # pytest will report this error during fixture setup
#         pytest.fail(f"Failed to setup gql_examples fixture: {e}", pytrace=False)

# _example_names = []
# try:
#     # Attempt to pre-load examples just to get names for parameterization
#     # Note: This duplicates loading but simplifies parametrize setup
#     # The fixture ensures the main test execution uses the proper setup/cached result.
#     _target_docstring = inspect.getdoc(query_wandb_tool)
#     if not _target_docstring:
#          raise ImportError("Docstring not found at collection time.")
#     _extracted_examples = extract_gql_examples_from_docstring(_target_docstring)
#     _example_names = [ex["name"] for ex in _extracted_examples if ex.get("variables_code")]
#     if not _example_names:
#          raise ValueError("No valid example names found at collection time.")
# except Exception as e:
#     print(f"Warning during test collection: Could not pre-load example names - {e}")
#     # If collection fails to get names, the test function relying on the fixture
#     # will fail later during setup/execution, which is acceptable.
#     _example_names = ["SETUP_ERROR_DURING_COLLECTION"] # Provide a placeholder


# # --- Test Function ---

# # Apply the live_api marker
# @pytest.mark.live_api
# @pytest.mark.parametrize(
#     "name", # Parametrize only by the example name
#     _example_names
# )
# def test_wandb_gql_example(name, gql_examples): # Inject fixture here, remove query/variables_code
#     """Runs a test for each extracted GraphQL example using live API calls."""

#     if name == "SETUP_ERROR_DURING_COLLECTION":
#         pytest.fail("Test collection could not determine example names. Check setup.")

#     # Find the correct example data from the fixture result based on the parameterized name
#     example_data = next((ex for ex in gql_examples if ex['name'] == name), None)
#     if not example_data:
#         pytest.fail(f"Could not find example data for name '{name}' in gql_examples fixture result.")

#     # Use the data looked up from the fixture
#     query = example_data["query"]
#     variables_code = example_data["variables_code"]

#     # The rest of the test logic remains largely the same...
#     print(f"\nRunning test for example: {name}")
#     print(f"Query:\n{query}")
#     print(f"Variables Code:\n{variables_code}")

#     variables = {}
#     try:
#         # Execute the Python code string to get the variables dictionary.
#         # Reverting to exec as ast.literal_eval cannot handle nested strings required for JSON literals.
#         local_scope = {'json': json} # Provide json module in the execution scope
#         # The variable `variables_code` should contain the raw python code from the docstring block
#         exec(variables_code, local_scope)

#         # Check if 'variables' was defined in the executed code
#         if 'variables' not in local_scope:
#             raise NameError("Executed code snippet did not define a 'variables' dictionary.")

#         variables = local_scope['variables']

#         if not isinstance(variables, dict):
#              raise TypeError(f"Executed code defined 'variables', but it is not a dictionary. Got: {type(variables)}")

#         print(f"Original Variables: {variables}")

#         # Override entity and project for the test run
#         # Check if the keys exist before assigning, especially for mutations
#         if 'entity' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'):
#             variables['entity'] = TARGET_ENTITY
#         if 'project' in variables or name.endswith('Info') or name.endswith('Runs') or name.endswith('Keys') or name.endswith('Sampled') or name.endswith('Details'):
#             variables['project'] = TARGET_PROJECT
#         # Handle entityName/projectName variants if needed
#         if 'entityName' in variables:
#              variables['entityName'] = TARGET_ENTITY
#         if 'projectName' in variables:
#              variables['projectName'] = TARGET_PROJECT

#         # Specific override for GetArtifactDetails test
#         if name == 'GetArtifactDetails':
#              # Use the specific artifact name provided by the user
#              variables['artifactName'] = "c-metrics/hallucination/SmolLM2-360M-sft-hallu:v12"
#              print(f"    Overriding artifactName for {name} test.") # Debug print

#         # Handle mutations which might not have standard entity/project vars
#         if name == 'UpsertProject' or name == 'CreateProject':
#              # Ensure the mutation targets the test entity, adjust name if needed
#              variables['entity'] = TARGET_ENTITY
#              variables['name'] = f"{TARGET_PROJECT}-test-upsert" # Avoid conflicts


#         # Handle cases where limit might be needed but not in example vars (like mutations)
#         # For mutations, the tool itself might not use max_items, depends on implementation
#         # For queries, ensure a reasonable limit if not present? Or rely on tool default.
#         # Let's rely on the tool's default `max_items` for now.

#         print(f"Modified Variables: {variables}")


#     except Exception as e:
#         pytest.fail(f"Failed to execute or modify variables code for example '{name}': {e}\nCode: {variables_code}")

#     # --- Make the Live API Call ---
#     try:
#         # Use default max_items and items_per_page from the tool's signature
#         result = query_wandb_tool(query=query, variables=variables)

#         print(f"API Result for {name}: {result}")

#         # --- Assertions ---
#         assert isinstance(result, dict), f"Expected result to be a dictionary, got {type(result)}"

#         # Check specifically for the 'errors' key which indicates GraphQL level errors
#         if 'errors' in result:
#              # Sometimes 'errors' is present but None or empty list, check content
#              error_content = result.get('errors')
#              assert not error_content, f"GraphQL API returned errors for example '{name}': {error_content}"

#         # Optional: Add more specific checks based on the query name if needed
#         # e.g., if name == "GetProjectInfo": assert "project" in result.get("data", {})

#     except Exception as e:
#         pytest.fail(f"query_wandb_tool raised an exception for example '{name}': {e}")

# # Note: This test makes live calls to the W&B API. Ensure:
# # 1. You are logged into W&B (e.g., via `wandb login`).
# # 2. The target project (c-metrics/hallucination) exists and is accessible.
# # 3. Network connectivity is available.
# # 4. Be mindful of API rate limits if running frequently.
# # To run only these tests: pytest -m live_api
# # To skip these tests: pytest -m "not live_api"