generated from lambda-feedback/Evaluation-Function-Boilerplate
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval_tests.yaml
More file actions
69 lines (67 loc) · 2.28 KB
/
eval_tests.yaml
File metadata and controls
69 lines (67 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# YAML supports comments: one reason to prefer it over JSON, which doesn't.
---
title: Trivial comparisons
tests:
- description: The response and answer are exactly the same, so the response should be considered correct.
# Parameters can be given, but this field can be removed if none are needed
params: {}
answer: "A & B"
response: "A & B"
expected_result:
is_correct: true
# Ensure that the latex returned (for the preview) is correct
response_latex: "A \\cdot B"
- description: Multi-character variable names are also supported
answer: "A & Test"
response: "A & Test"
expected_result:
is_correct: true
response_latex: "A \\cdot \\mathrm{Test}"
# Tests can be divided into sections using '---'
---
title: Trivial comparisons, but not identical
tests:
- description: Variables can appear in any order.
answer: "A & B"
response: "B & A"
expected_result:
is_correct: true
# Any other fields returned by the evaluation function can be tested too, but this is optional.
- description: > # Multi-line strings are possible for readability
The wrong operator is used, so this is incorrect as
the two expressions have different truth tables.
answer: "A & B"
response: "A | B"
expected_result:
is_correct: false
---
# This illustrates how sub-tests can be used to share the same answer and parameters
# for multiple tests.
title: More complex comparisons
tests:
- description: XOR can be implemented using NAND or NOR
answer: "A ^ B"
sub_tests:
- description: "Using NAND:"
response: "~(~(A & ~(A & B)) & ~(B & ~(A & B)))"
expected_result:
is_correct: true
- description: "Using NOR:"
response: "~(~(~A | ~B) | ~(A | B))"
expected_result:
is_correct: true
- description: "A few examples using de Morgan's laws:"
answer: "A | (B & C)"
sub_tests:
- response: "~(~A & ~(B & C))"
expected_result:
is_correct: true
- response: "A | ~(~B | ~C)"
expected_result:
is_correct: true
- description: You shouldn't see this one in the docs, but it is still a functional test
answer: "A | B"
response: "~(~A & ~B)"
exclude_from_docs: true
expected_result:
is_correct: true