Skip to content

Commit dccbcd4

Browse files
authored
Merge pull request #533 from Titas-Ghosh/feat/validate-json-output
cli: add JSON output for concore validate
2 parents 26c81b4 + 96b1330 commit dccbcd4

3 files changed

Lines changed: 230 additions & 9 deletions

File tree

concore_cli/cli.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,22 @@ def build(workflow_file, source, output, type, auto_build, compose):
9595
@cli.command()
9696
@click.argument("workflow_file", type=click.Path(exists=True))
9797
@click.option("--source", "-s", default="src", help="Source directory")
98-
def validate(workflow_file, source):
98+
@click.option(
99+
"--format",
100+
"output_format",
101+
default="text",
102+
type=click.Choice(["text", "json"]),
103+
help="Validation output format",
104+
)
105+
def validate(workflow_file, source, output_format):
99106
"""Validate a workflow file"""
100107
try:
101-
ok = validate_workflow(workflow_file, source, console)
108+
ok = validate_workflow(
109+
workflow_file,
110+
source,
111+
console,
112+
output_format=output_format,
113+
)
102114
if not ok:
103115
sys.exit(1)
104116
except Exception as e:

concore_cli/commands/validate.py

Lines changed: 178 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,167 @@
1+
import json
12
from pathlib import Path
23
from bs4 import BeautifulSoup
34
from rich.panel import Panel
45
import re
56
import xml.etree.ElementTree as ET
67

78

8-
def validate_workflow(workflow_file, source_dir, console):
9+
def _classify_message(message, bucket_name):
10+
if bucket_name == "info":
11+
if message.startswith("Found ") and "node(s)" in message:
12+
return {"info_type": "node_count"}
13+
if message.startswith("Found ") and "edge(s)" in message:
14+
return {"info_type": "edge_count"}
15+
if message.startswith("ZMQ-based edges:"):
16+
return {"info_type": "zmq_edges"}
17+
if message.startswith("File-based edges:"):
18+
return {"info_type": "file_edges"}
19+
return {"info_type": "info"}
20+
21+
if message == "File is empty":
22+
return {"error_type": "empty_file"}
23+
if message.startswith("Invalid XML:"):
24+
return {"error_type": "invalid_xml"}
25+
if message == "Not a valid GraphML file - missing <graphml> root element":
26+
return {"error_type": "invalid_graphml"}
27+
if message == "Missing <graph> element":
28+
return {"error_type": "missing_graph_element"}
29+
if message == "Graph missing required 'edgedefault' attribute":
30+
return {"error_type": "missing_edgedefault"}
31+
if message.startswith("Invalid edgedefault value"):
32+
return {"error_type": "invalid_edgedefault"}
33+
if message == "No nodes found in workflow":
34+
return {"error_type": "no_nodes"}
35+
if message == "No edges found in workflow":
36+
return {"error_type": "no_edges"}
37+
if message.startswith("Source directory not found:"):
38+
return {"error_type": "missing_source_dir"}
39+
if message == "Node missing required 'id' attribute":
40+
return {"error_type": "missing_node_id"}
41+
if message.startswith("Node '") and message.endswith(
42+
"contains unsafe shell characters"
43+
):
44+
return {"error_type": "unsafe_node_label"}
45+
if message.startswith("Node '") and "missing format 'ID:filename'" in message:
46+
return {"error_type": "invalid_node_label_format"}
47+
if message.startswith("Node '") and message.endswith("has invalid format"):
48+
return {"error_type": "invalid_node_label_format"}
49+
if message.startswith("Node '") and message.endswith("has no filename"):
50+
return {"error_type": "missing_node_filename"}
51+
if message.startswith("Node '") and message.endswith("has unusual file extension"):
52+
return {"error_type": "unusual_file_extension"}
53+
if message.startswith("Missing source file:"):
54+
return {"error_type": "missing_source_file"}
55+
if message.startswith("Node ") and message.endswith(" has no label"):
56+
return {"error_type": "missing_node_label"}
57+
if message.startswith("Error parsing node:"):
58+
return {"error_type": "node_parse_error"}
59+
if message.startswith("Duplicate node label:"):
60+
return {"error_type": "duplicate_node_label"}
61+
if message == "Edge missing source or target":
62+
return {"error_type": "missing_edge_endpoint"}
63+
if message.startswith("Edge references non-existent source node:"):
64+
return {"error_type": "missing_edge_source"}
65+
if message.startswith("Edge references non-existent target node:"):
66+
return {"error_type": "missing_edge_target"}
67+
if message == "Workflow contains cycles (expected for control loops)":
68+
return {"error_type": "cycle_detected"}
69+
if message.startswith("Invalid port number:"):
70+
return {"error_type": "invalid_port_number"}
71+
if message.startswith("Port conflict:"):
72+
return {"error_type": "port_conflict"}
73+
if message.startswith("Port ") and "is in reserved range" in message:
74+
return {"error_type": "reserved_port"}
75+
if message.startswith("File not found:"):
76+
return {"error_type": "file_not_found"}
77+
if message.startswith("Validation failed:"):
78+
return {"error_type": "validation_exception"}
79+
return {"error_type": "validation_message"}
80+
81+
82+
def _build_entries(bucket_name, messages, source_nodes):
83+
entries = []
84+
for message in messages:
85+
entry = {"message": message}
86+
entry.update(_classify_message(message, bucket_name))
87+
88+
if message.startswith("Missing source file:"):
89+
filename = message.split(":", 1)[1].strip()
90+
node_id = source_nodes.get(filename)
91+
if node_id:
92+
entry["node_id"] = node_id
93+
elif message.startswith("Node ") and message.endswith(" has no label"):
94+
entry["node_id"] = message[5:-9]
95+
elif message.startswith("Edge references non-existent source node:"):
96+
entry["node_id"] = message.split(":", 1)[1].strip()
97+
elif message.startswith("Edge references non-existent target node:"):
98+
entry["node_id"] = message.split(":", 1)[1].strip()
99+
100+
entries.append(entry)
101+
return entries
102+
103+
104+
def _build_payload(workflow_path, source_root, errors, warnings, info, source_nodes):
105+
error_entries = _build_entries("errors", errors, source_nodes)
106+
warning_entries = _build_entries("warnings", warnings, source_nodes)
107+
info_entries = _build_entries("info", info, source_nodes)
108+
109+
nodes_affected = []
110+
for entry in error_entries + warning_entries:
111+
node_id = entry.get("node_id")
112+
if node_id and node_id not in nodes_affected:
113+
nodes_affected.append(node_id)
114+
115+
return {
116+
"workflow": workflow_path.name,
117+
"source_dir": str(source_root),
118+
"valid": len(errors) == 0,
119+
"errors": error_entries,
120+
"warnings": warning_entries,
121+
"info": info_entries,
122+
"summary": {
123+
"error_count": len(error_entries),
124+
"warning_count": len(warning_entries),
125+
"info_count": len(info_entries),
126+
"nodes_affected": nodes_affected,
127+
},
128+
}
129+
130+
131+
def validate_workflow(workflow_file, source_dir, console, output_format="text"):
9132
workflow_path = Path(workflow_file)
10133
source_root = workflow_path.parent / source_dir
11134

12-
console.print(f"[cyan]Validating:[/cyan] {workflow_path.name}")
13-
console.print()
135+
if output_format == "text":
136+
console.print(f"[cyan]Validating:[/cyan] {workflow_path.name}")
137+
console.print()
14138

15139
errors = []
16140
warnings = []
17141
info = []
142+
source_nodes = {}
18143

19144
def finalize():
20-
show_results(console, errors, warnings, info)
145+
if output_format == "json":
146+
print(
147+
json.dumps(
148+
_build_payload(
149+
workflow_path,
150+
source_root,
151+
errors,
152+
warnings,
153+
info,
154+
source_nodes,
155+
),
156+
indent=2,
157+
)
158+
)
159+
else:
160+
show_results(console, errors, warnings, info)
21161
return len(errors) == 0
22162

23163
try:
24-
with open(workflow_path, "r") as f:
164+
with open(workflow_path, "r", encoding="utf-8") as f:
25165
content = f.read()
26166

27167
if not content.strip():
@@ -109,6 +249,7 @@ def finalize():
109249
warnings.append(f"Node '{label}' has invalid format")
110250
else:
111251
nodeId_part, filename = parts
252+
source_nodes[filename] = node_id
112253
if not filename:
113254
errors.append(f"Node '{label}' has no filename")
114255
elif not any(
@@ -177,10 +318,40 @@ def finalize():
177318
return finalize()
178319

179320
except FileNotFoundError:
180-
console.print(f"[red]Error:[/red] File not found: {workflow_path}")
321+
if output_format == "json":
322+
print(
323+
json.dumps(
324+
_build_payload(
325+
workflow_path,
326+
source_root,
327+
[f"File not found: {workflow_path}"],
328+
[],
329+
[],
330+
source_nodes,
331+
),
332+
indent=2,
333+
)
334+
)
335+
else:
336+
console.print(f"[red]Error:[/red] File not found: {workflow_path}")
181337
return False
182338
except Exception as e:
183-
console.print(f"[red]Validation failed:[/red] {str(e)}")
339+
if output_format == "json":
340+
print(
341+
json.dumps(
342+
_build_payload(
343+
workflow_path,
344+
source_root,
345+
[f"Validation failed: {str(e)}"],
346+
[],
347+
[],
348+
source_nodes,
349+
),
350+
indent=2,
351+
)
352+
)
353+
else:
354+
console.print(f"[red]Validation failed:[/red] {str(e)}")
184355
return False
185356

186357

tests/test_cli.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,44 @@ def test_validate_missing_node_file(self):
8484
self.assertNotEqual(result.exit_code, 0)
8585
self.assertIn("Missing source file", result.output)
8686

87+
def test_validate_json_output_for_valid_file(self):
88+
with self.runner.isolated_filesystem(temp_dir=self.temp_dir):
89+
result = self.runner.invoke(cli, ["init", "test-project"])
90+
self.assertEqual(result.exit_code, 0)
91+
92+
result = self.runner.invoke(
93+
cli,
94+
["validate", "test-project/workflow.graphml", "--format", "json"],
95+
)
96+
self.assertEqual(result.exit_code, 0)
97+
98+
payload = json.loads(result.output)
99+
self.assertTrue(payload["valid"])
100+
self.assertEqual(payload["summary"]["error_count"], 0)
101+
self.assertEqual(payload["workflow"], "workflow.graphml")
102+
self.assertIn("src", payload["source_dir"])
103+
104+
def test_validate_json_output_for_missing_source_file(self):
105+
with self.runner.isolated_filesystem(temp_dir=self.temp_dir):
106+
result = self.runner.invoke(cli, ["init", "test-project"])
107+
self.assertEqual(result.exit_code, 0)
108+
109+
missing_file = Path("test-project/src/script.py")
110+
if missing_file.exists():
111+
missing_file.unlink()
112+
113+
result = self.runner.invoke(
114+
cli,
115+
["validate", "test-project/workflow.graphml", "--format", "json"],
116+
)
117+
self.assertNotEqual(result.exit_code, 0)
118+
119+
payload = json.loads(result.output)
120+
self.assertFalse(payload["valid"])
121+
self.assertEqual(payload["summary"]["error_count"], 1)
122+
self.assertEqual(payload["errors"][0]["error_type"], "missing_source_file")
123+
self.assertEqual(payload["errors"][0]["node_id"], "n1")
124+
87125
def test_status_command(self):
88126
result = self.runner.invoke(cli, ["status"])
89127
self.assertEqual(result.exit_code, 0)

0 commit comments

Comments
 (0)