|
| 1 | +import json |
1 | 2 | from pathlib import Path |
2 | 3 | from bs4 import BeautifulSoup |
3 | 4 | from rich.panel import Panel |
4 | 5 | import re |
5 | 6 | import xml.etree.ElementTree as ET |
6 | 7 |
|
7 | 8 |
|
8 | | -def validate_workflow(workflow_file, source_dir, console): |
| 9 | +def _classify_message(message, bucket_name): |
| 10 | + if bucket_name == "info": |
| 11 | + if message.startswith("Found ") and "node(s)" in message: |
| 12 | + return {"info_type": "node_count"} |
| 13 | + if message.startswith("Found ") and "edge(s)" in message: |
| 14 | + return {"info_type": "edge_count"} |
| 15 | + if message.startswith("ZMQ-based edges:"): |
| 16 | + return {"info_type": "zmq_edges"} |
| 17 | + if message.startswith("File-based edges:"): |
| 18 | + return {"info_type": "file_edges"} |
| 19 | + return {"info_type": "info"} |
| 20 | + |
| 21 | + if message == "File is empty": |
| 22 | + return {"error_type": "empty_file"} |
| 23 | + if message.startswith("Invalid XML:"): |
| 24 | + return {"error_type": "invalid_xml"} |
| 25 | + if message == "Not a valid GraphML file - missing <graphml> root element": |
| 26 | + return {"error_type": "invalid_graphml"} |
| 27 | + if message == "Missing <graph> element": |
| 28 | + return {"error_type": "missing_graph_element"} |
| 29 | + if message == "Graph missing required 'edgedefault' attribute": |
| 30 | + return {"error_type": "missing_edgedefault"} |
| 31 | + if message.startswith("Invalid edgedefault value"): |
| 32 | + return {"error_type": "invalid_edgedefault"} |
| 33 | + if message == "No nodes found in workflow": |
| 34 | + return {"error_type": "no_nodes"} |
| 35 | + if message == "No edges found in workflow": |
| 36 | + return {"error_type": "no_edges"} |
| 37 | + if message.startswith("Source directory not found:"): |
| 38 | + return {"error_type": "missing_source_dir"} |
| 39 | + if message == "Node missing required 'id' attribute": |
| 40 | + return {"error_type": "missing_node_id"} |
| 41 | + if message.startswith("Node '") and message.endswith( |
| 42 | + "contains unsafe shell characters" |
| 43 | + ): |
| 44 | + return {"error_type": "unsafe_node_label"} |
| 45 | + if message.startswith("Node '") and "missing format 'ID:filename'" in message: |
| 46 | + return {"error_type": "invalid_node_label_format"} |
| 47 | + if message.startswith("Node '") and message.endswith("has invalid format"): |
| 48 | + return {"error_type": "invalid_node_label_format"} |
| 49 | + if message.startswith("Node '") and message.endswith("has no filename"): |
| 50 | + return {"error_type": "missing_node_filename"} |
| 51 | + if message.startswith("Node '") and message.endswith("has unusual file extension"): |
| 52 | + return {"error_type": "unusual_file_extension"} |
| 53 | + if message.startswith("Missing source file:"): |
| 54 | + return {"error_type": "missing_source_file"} |
| 55 | + if message.startswith("Node ") and message.endswith(" has no label"): |
| 56 | + return {"error_type": "missing_node_label"} |
| 57 | + if message.startswith("Error parsing node:"): |
| 58 | + return {"error_type": "node_parse_error"} |
| 59 | + if message.startswith("Duplicate node label:"): |
| 60 | + return {"error_type": "duplicate_node_label"} |
| 61 | + if message == "Edge missing source or target": |
| 62 | + return {"error_type": "missing_edge_endpoint"} |
| 63 | + if message.startswith("Edge references non-existent source node:"): |
| 64 | + return {"error_type": "missing_edge_source"} |
| 65 | + if message.startswith("Edge references non-existent target node:"): |
| 66 | + return {"error_type": "missing_edge_target"} |
| 67 | + if message == "Workflow contains cycles (expected for control loops)": |
| 68 | + return {"error_type": "cycle_detected"} |
| 69 | + if message.startswith("Invalid port number:"): |
| 70 | + return {"error_type": "invalid_port_number"} |
| 71 | + if message.startswith("Port conflict:"): |
| 72 | + return {"error_type": "port_conflict"} |
| 73 | + if message.startswith("Port ") and "is in reserved range" in message: |
| 74 | + return {"error_type": "reserved_port"} |
| 75 | + if message.startswith("File not found:"): |
| 76 | + return {"error_type": "file_not_found"} |
| 77 | + if message.startswith("Validation failed:"): |
| 78 | + return {"error_type": "validation_exception"} |
| 79 | + return {"error_type": "validation_message"} |
| 80 | + |
| 81 | + |
| 82 | +def _build_entries(bucket_name, messages, source_nodes): |
| 83 | + entries = [] |
| 84 | + for message in messages: |
| 85 | + entry = {"message": message} |
| 86 | + entry.update(_classify_message(message, bucket_name)) |
| 87 | + |
| 88 | + if message.startswith("Missing source file:"): |
| 89 | + filename = message.split(":", 1)[1].strip() |
| 90 | + node_id = source_nodes.get(filename) |
| 91 | + if node_id: |
| 92 | + entry["node_id"] = node_id |
| 93 | + elif message.startswith("Node ") and message.endswith(" has no label"): |
| 94 | + entry["node_id"] = message[5:-9] |
| 95 | + elif message.startswith("Edge references non-existent source node:"): |
| 96 | + entry["node_id"] = message.split(":", 1)[1].strip() |
| 97 | + elif message.startswith("Edge references non-existent target node:"): |
| 98 | + entry["node_id"] = message.split(":", 1)[1].strip() |
| 99 | + |
| 100 | + entries.append(entry) |
| 101 | + return entries |
| 102 | + |
| 103 | + |
| 104 | +def _build_payload(workflow_path, source_root, errors, warnings, info, source_nodes): |
| 105 | + error_entries = _build_entries("errors", errors, source_nodes) |
| 106 | + warning_entries = _build_entries("warnings", warnings, source_nodes) |
| 107 | + info_entries = _build_entries("info", info, source_nodes) |
| 108 | + |
| 109 | + nodes_affected = [] |
| 110 | + for entry in error_entries + warning_entries: |
| 111 | + node_id = entry.get("node_id") |
| 112 | + if node_id and node_id not in nodes_affected: |
| 113 | + nodes_affected.append(node_id) |
| 114 | + |
| 115 | + return { |
| 116 | + "workflow": workflow_path.name, |
| 117 | + "source_dir": str(source_root), |
| 118 | + "valid": len(errors) == 0, |
| 119 | + "errors": error_entries, |
| 120 | + "warnings": warning_entries, |
| 121 | + "info": info_entries, |
| 122 | + "summary": { |
| 123 | + "error_count": len(error_entries), |
| 124 | + "warning_count": len(warning_entries), |
| 125 | + "info_count": len(info_entries), |
| 126 | + "nodes_affected": nodes_affected, |
| 127 | + }, |
| 128 | + } |
| 129 | + |
| 130 | + |
| 131 | +def validate_workflow(workflow_file, source_dir, console, output_format="text"): |
9 | 132 | workflow_path = Path(workflow_file) |
10 | 133 | source_root = workflow_path.parent / source_dir |
11 | 134 |
|
12 | | - console.print(f"[cyan]Validating:[/cyan] {workflow_path.name}") |
13 | | - console.print() |
| 135 | + if output_format == "text": |
| 136 | + console.print(f"[cyan]Validating:[/cyan] {workflow_path.name}") |
| 137 | + console.print() |
14 | 138 |
|
15 | 139 | errors = [] |
16 | 140 | warnings = [] |
17 | 141 | info = [] |
| 142 | + source_nodes = {} |
18 | 143 |
|
19 | 144 | def finalize(): |
20 | | - show_results(console, errors, warnings, info) |
| 145 | + if output_format == "json": |
| 146 | + print( |
| 147 | + json.dumps( |
| 148 | + _build_payload( |
| 149 | + workflow_path, |
| 150 | + source_root, |
| 151 | + errors, |
| 152 | + warnings, |
| 153 | + info, |
| 154 | + source_nodes, |
| 155 | + ), |
| 156 | + indent=2, |
| 157 | + ) |
| 158 | + ) |
| 159 | + else: |
| 160 | + show_results(console, errors, warnings, info) |
21 | 161 | return len(errors) == 0 |
22 | 162 |
|
23 | 163 | try: |
24 | | - with open(workflow_path, "r") as f: |
| 164 | + with open(workflow_path, "r", encoding="utf-8") as f: |
25 | 165 | content = f.read() |
26 | 166 |
|
27 | 167 | if not content.strip(): |
@@ -109,6 +249,7 @@ def finalize(): |
109 | 249 | warnings.append(f"Node '{label}' has invalid format") |
110 | 250 | else: |
111 | 251 | nodeId_part, filename = parts |
| 252 | + source_nodes[filename] = node_id |
112 | 253 | if not filename: |
113 | 254 | errors.append(f"Node '{label}' has no filename") |
114 | 255 | elif not any( |
@@ -177,10 +318,40 @@ def finalize(): |
177 | 318 | return finalize() |
178 | 319 |
|
179 | 320 | except FileNotFoundError: |
180 | | - console.print(f"[red]Error:[/red] File not found: {workflow_path}") |
| 321 | + if output_format == "json": |
| 322 | + print( |
| 323 | + json.dumps( |
| 324 | + _build_payload( |
| 325 | + workflow_path, |
| 326 | + source_root, |
| 327 | + [f"File not found: {workflow_path}"], |
| 328 | + [], |
| 329 | + [], |
| 330 | + source_nodes, |
| 331 | + ), |
| 332 | + indent=2, |
| 333 | + ) |
| 334 | + ) |
| 335 | + else: |
| 336 | + console.print(f"[red]Error:[/red] File not found: {workflow_path}") |
181 | 337 | return False |
182 | 338 | except Exception as e: |
183 | | - console.print(f"[red]Validation failed:[/red] {str(e)}") |
| 339 | + if output_format == "json": |
| 340 | + print( |
| 341 | + json.dumps( |
| 342 | + _build_payload( |
| 343 | + workflow_path, |
| 344 | + source_root, |
| 345 | + [f"Validation failed: {str(e)}"], |
| 346 | + [], |
| 347 | + [], |
| 348 | + source_nodes, |
| 349 | + ), |
| 350 | + indent=2, |
| 351 | + ) |
| 352 | + ) |
| 353 | + else: |
| 354 | + console.print(f"[red]Validation failed:[/red] {str(e)}") |
184 | 355 | return False |
185 | 356 |
|
186 | 357 |
|
|
0 commit comments