-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluate.ts
More file actions
43 lines (41 loc) · 1.34 KB
/
evaluate.ts
File metadata and controls
43 lines (41 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
/**
* Programmatic API Example
*
* Uses evaluate() from @agentv/core to run evaluations as a library.
* No YAML needed — tests defined inline with full type safety.
*
* Run: bun run evaluate.ts
* (Uses 'default' target from .agentv/targets.yaml and .env credentials)
*/
import { evaluate } from '@agentv/core';
const { results, summary } = await evaluate({
tests: [
{
id: 'greeting',
input: 'Say hello and introduce yourself briefly.',
expectedOutput: "Hello! I'm an AI assistant here to help you.",
assert: [{ type: 'contains', value: 'Hello' }],
},
{
id: 'json-output',
input: [
{ role: 'system', content: 'Respond only with valid JSON. No markdown.' },
{ role: 'user', content: 'Return a JSON object with a "status" field set to "ok".' },
],
expectedOutput: '{"status": "ok"}',
assert: [
{ type: 'is-json', required: true },
{ type: 'contains', value: 'ok' },
],
},
],
onResult: (result) => {
console.log(` ${result.testId}: score=${result.score.toFixed(2)}`);
},
});
console.log('\n--- Summary ---');
console.log(`Total: ${summary.total}`);
console.log(`Passed: ${summary.passed}`);
console.log(`Failed: ${summary.failed}`);
console.log(`Mean score: ${summary.meanScore.toFixed(2)}`);
console.log(`Duration: ${summary.durationMs}ms`);