Skip to content

Commit 46dbf42

Browse files
committed
feat: interpreter and code runner, support test case, the only failed test case is the mixed scoping in test scope, which has a possibility of claude hallucinating
1 parent 5bc0a44 commit 46dbf42

15 files changed

Lines changed: 3327 additions & 2579 deletions
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
"""
2+
Fixed Code Runner - orchestrates parsing and execution.
3+
4+
FIXES:
5+
- Better error handling with specific exception types
6+
- Improved error messages
7+
- Handles ReturnException at global scope
8+
"""
9+
10+
from typing import List
11+
from ..schemas.input_schema import ExecutionTestCase
12+
from .interpreter import Interpreter, ReturnException
13+
from ..schemas.output_schema import CodeCorrectnessResult, TestCaseResult, ParseResult
14+
from ..parser.parser import PseudocodeParser
15+
16+
17+
class CodeRunner:
18+
"""
19+
Orchestrates parsing and execution of pseudocode.
20+
21+
Workflow:
22+
1. Parse source code to AST
23+
2. Execute AST with each test case
24+
3. Compare results with expected values
25+
4. Return aggregated results
26+
"""
27+
28+
def __init__(self, parser: PseudocodeParser, interpreter: Interpreter):
29+
self.parser = parser
30+
self.interpreter = interpreter
31+
32+
def run(
33+
self,
34+
source_code: str,
35+
test_cases: List[ExecutionTestCase]
36+
) -> CodeCorrectnessResult:
37+
"""
38+
Parse and execute pseudocode with test cases.
39+
40+
Args:
41+
source_code: Pseudocode to parse and execute
42+
test_cases: List of test cases with inputs and expected outputs
43+
44+
Returns:
45+
CodeCorrectnessResult with parse status and execution results
46+
"""
47+
48+
# -----------------------------------
49+
# 1. Parse
50+
# -----------------------------------
51+
52+
parse_result: ParseResult = self.parser.parse(source_code)
53+
54+
if not parse_result.success or not parse_result.ast:
55+
return CodeCorrectnessResult(
56+
parse_success=False,
57+
parse_errors=parse_result.errors,
58+
parse_warnings=parse_result.warnings,
59+
normalized_code=parse_result.normalized_code,
60+
execution_results=[],
61+
is_correct=False,
62+
feedback="Parsing failed. Fix syntax errors before execution."
63+
)
64+
65+
# -----------------------------------
66+
# 2. Execute Test Cases
67+
# -----------------------------------
68+
69+
execution_results: List[TestCaseResult] = []
70+
71+
for i, test_case in enumerate(test_cases):
72+
try:
73+
result = self.interpreter.run(
74+
parse_result.ast,
75+
initial_variables=test_case.initial_variables
76+
)
77+
78+
# Check if results match expectations
79+
passed = True
80+
error_messages = []
81+
82+
if test_case.expected_variables is not None:
83+
if result["variables"] != test_case.expected_variables:
84+
passed = False
85+
error_messages.append(
86+
f"Variables mismatch: expected {test_case.expected_variables}, "
87+
f"got {result['variables']}"
88+
)
89+
90+
if test_case.expected_output is not None:
91+
if result["output"] != test_case.expected_output:
92+
passed = False
93+
error_messages.append(
94+
f"Output mismatch: expected {test_case.expected_output}, "
95+
f"got {result['output']}"
96+
)
97+
98+
execution_results.append(
99+
TestCaseResult(
100+
input_data=test_case.initial_variables,
101+
expected_output={
102+
"variables": test_case.expected_variables,
103+
"output": test_case.expected_output
104+
},
105+
actual_output=result,
106+
passed=passed,
107+
error_message="; ".join(error_messages) if error_messages else None
108+
)
109+
)
110+
111+
# FIX #13: More specific error handling
112+
except ReturnException as e:
113+
# Return at global scope - shouldn't happen
114+
execution_results.append(
115+
TestCaseResult(
116+
input_data=test_case.initial_variables,
117+
expected_output={
118+
"variables": test_case.expected_variables,
119+
"output": test_case.expected_output
120+
},
121+
actual_output=None,
122+
passed=False,
123+
error_message=f"Unexpected return statement (returned {e.value})"
124+
)
125+
)
126+
127+
except NameError as e:
128+
execution_results.append(
129+
TestCaseResult(
130+
input_data=test_case.initial_variables,
131+
expected_output={
132+
"variables": test_case.expected_variables,
133+
"output": test_case.expected_output
134+
},
135+
actual_output=None,
136+
passed=False,
137+
error_message=f"Variable or function not defined: {str(e)}"
138+
)
139+
)
140+
141+
except ZeroDivisionError as e:
142+
execution_results.append(
143+
TestCaseResult(
144+
input_data=test_case.initial_variables,
145+
expected_output={
146+
"variables": test_case.expected_variables,
147+
"output": test_case.expected_output
148+
},
149+
actual_output=None,
150+
passed=False,
151+
error_message="Division by zero"
152+
)
153+
)
154+
155+
except IndexError as e:
156+
execution_results.append(
157+
TestCaseResult(
158+
input_data=test_case.initial_variables,
159+
expected_output={
160+
"variables": test_case.expected_variables,
161+
"output": test_case.expected_output
162+
},
163+
actual_output=None,
164+
passed=False,
165+
error_message=f"Array index error: {str(e)}"
166+
)
167+
)
168+
169+
except TypeError as e:
170+
execution_results.append(
171+
TestCaseResult(
172+
input_data=test_case.initial_variables,
173+
expected_output={
174+
"variables": test_case.expected_variables,
175+
"output": test_case.expected_output
176+
},
177+
actual_output=None,
178+
passed=False,
179+
error_message=f"Type error: {str(e)}"
180+
)
181+
)
182+
183+
except RuntimeError as e:
184+
execution_results.append(
185+
TestCaseResult(
186+
input_data=test_case.initial_variables,
187+
expected_output={
188+
"variables": test_case.expected_variables,
189+
"output": test_case.expected_output
190+
},
191+
actual_output=None,
192+
passed=False,
193+
error_message=f"Runtime error: {str(e)}"
194+
)
195+
)
196+
197+
except Exception as e:
198+
# Catch-all for unexpected errors
199+
execution_results.append(
200+
TestCaseResult(
201+
input_data=test_case.initial_variables,
202+
expected_output={
203+
"variables": test_case.expected_variables,
204+
"output": test_case.expected_output
205+
},
206+
actual_output=None,
207+
passed=False,
208+
error_message=f"{type(e).__name__}: {str(e)}"
209+
)
210+
)
211+
212+
# -----------------------------------
213+
# 3. Aggregate Results
214+
# -----------------------------------
215+
216+
all_passed = all(r.passed for r in execution_results)
217+
218+
# Generate feedback
219+
if not execution_results:
220+
feedback = "Parsing successful. No test cases provided."
221+
is_correct = True
222+
elif all_passed:
223+
feedback = f"All {len(execution_results)} test case(s) passed! ✅"
224+
is_correct = True
225+
else:
226+
failed_count = sum(1 for r in execution_results if not r.passed)
227+
feedback = (f"{failed_count} of {len(execution_results)} test case(s) failed. "
228+
f"Check execution results for details.")
229+
is_correct = False
230+
231+
return CodeCorrectnessResult(
232+
parse_success=True,
233+
parse_errors=parse_result.errors,
234+
parse_warnings=parse_result.warnings,
235+
normalized_code=parse_result.normalized_code,
236+
execution_results=execution_results,
237+
is_correct=is_correct,
238+
feedback=feedback
239+
)

0 commit comments

Comments
 (0)