diff --git a/README.md b/README.md index 9ef672b..9fed6f4 100644 --- a/README.md +++ b/README.md @@ -306,6 +306,7 @@ jobs: - Python all compiles - The interview file is minimally correct (python code blocks compile, mako statements compile, we are using known Docassemble keys in the YAML) - Absolute URLs in `docassemble/*/data/questions` do not return HTTP 404 (excluding `example.com` links) +- PDFs in the repository (especially `docassemble/*/data/templates`) are checked for PDF/UA-1 accessibility compliance using [veraPDF](https://verapdf.org/) #### Usage @@ -327,8 +328,46 @@ jobs: ignore-urls: | https://example.com/known-flaky-endpoint https://another.example.org/blocked-from-ci + # Optional: skip PDF accessibility check entirely + skip-pdf-check: "true" + # Optional: fail the build instead of just warning on inaccessible PDFs + verapdf-validation-mode: "error" + # Optional: enforce form-field annotation structure rules (strict mode) + verapdf-strict: "true" ``` +#### Input Parameters + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `python-version` | Python version to use | `"3.12"` | +| `skip-url-check` | Skip URL checker network calls | `"false"` | +| `skip-templates` | Skip checking URLs in template files | `"false"` | +| `ignore-urls` | Comma/newline-separated absolute URLs to ignore in URL checks | `""` | +| `skip-pdf-check` | Skip PDF accessibility checking and veraPDF installation | `"false"` | +| `verapdf-validation-mode` | How to report PDF/UA-1 accessibility failures: `"warning"` annotates without failing; `"error"` fails the build | `"warning"` | +| `verapdf-strict` | Enable strict checking: `"true"` activates tab-order and form-annotation structure rules (suppressed by default because forms are often flattened before users see them) | `"false"` | + +#### PDF Accessibility Checking + +veraPDF is installed automatically and used to validate every PDF in the repository against the **PDF/UA-1** (ISO 14289-1) accessibility standard. +PDFs found under `docassemble/*/data/templates/` are checked first, followed by any other PDFs in the repository. + +Results are written to the **job summary** with per-PDF rule tables and a **warning annotation** is emitted in the action log. +Set `verapdf-validation-mode: "error"` to turn failures into build failures, or `skip-pdf-check: "true"` to disable the check entirely. + +Rules are classified into four severity levels: + +| Severity | Behaviour | Examples | +|----------|-----------|---------| +| **Fail** | Emits warning/error annotation; fails build in `error` mode | Missing structure tree, untagged content, figures without alt text, font missing ToUnicode | +| **Warning** | Always emits a warning annotation; never fails the build | Missing `dc:title`, missing language, missing `DisplayDocTitle`, advisory table/list structure | +| **Info** | Logged to console only; no annotation | Missing PDF/UA XMP identifier (`§5`), optional content config | +| **Suppressed** *(non-strict)* | Logged as suppressed; no annotation | Tab order (`§7.18.3`), widget annotation in Form tag (`§7.18.4`) | + +In **non-strict mode** (default), tab-order and form-annotation structure rules are suppressed because many tools flatten form fields before the user sees the final PDF. +Set `verapdf-strict: "true"` to treat these as failures. + ## Development Details Using [codeql-action](https://github.com/github/codeql-action) as diff --git a/da_build/action.yml b/da_build/action.yml index 63b53fb..8b80aa5 100644 --- a/da_build/action.yml +++ b/da_build/action.yml @@ -14,6 +14,22 @@ inputs: ignore-urls: description: Comma/newline-separated absolute URLs to ignore in URL checks default: "" + skip-pdf-check: + description: Skip PDF accessibility checking and veraPDF installation + default: "false" + verapdf-validation-mode: + description: >- + How to report PDF/UA-1 accessibility failures found by veraPDF. + 'warning' annotates the job without failing it (default). + 'error' fails the build. + default: "warning" + verapdf-strict: + description: >- + Enable strict PDF/UA-1 checking. + When 'false' (default), tab-order and annotation structure rules for form + fields are suppressed because forms are often flattened before users see + them. Set to 'true' to treat those rules as failures. + default: "false" runs: using: composite @@ -94,3 +110,64 @@ runs: echo "::warning title=URL checker::$escaped" fi shell: bash + + - name: Install veraPDF + run: | + if [ "${{ inputs.skip-pdf-check }}" = "true" ]; then + echo "Skipping veraPDF installation" + exit 0 + fi + + # veraPDF 1.28+ is required for compatibility with Java 21 (GitHub Actions default). + VERAPDF_VERSION="1.28.1" + VERAPDF_MINOR="1.28" + INSTALL_DIR="${RUNNER_TEMP}/verapdf" + + if command -v verapdf &>/dev/null; then + echo "veraPDF already available: $(verapdf --version 2>&1 | head -1)" + exit 0 + fi + + echo "Downloading veraPDF ${VERAPDF_VERSION}..." + wget -q \ + "https://software.verapdf.org/releases/${VERAPDF_MINOR}/verapdf-greenfield-${VERAPDF_VERSION}-installer.zip" \ + -O "${RUNNER_TEMP}/verapdf-installer.zip" + + unzip -q "${RUNNER_TEMP}/verapdf-installer.zip" -d "${RUNNER_TEMP}/verapdf-installer-src" + + cat > "${RUNNER_TEMP}/verapdf-autoinstall.xml" << EOF + + + + ${INSTALL_DIR} + + + + + + + + + + + + + EOF + + INSTALLER_JAR=$(find "${RUNNER_TEMP}/verapdf-installer-src" -name "*.jar" | head -1) + java -jar "${INSTALLER_JAR}" "${RUNNER_TEMP}/verapdf-autoinstall.xml" + echo "${INSTALL_DIR}" >> "${GITHUB_PATH}" + echo "Installed veraPDF: $("${INSTALL_DIR}/verapdf" --version 2>&1 | head -1)" + shell: bash + + - name: Check PDF accessibility with veraPDF + run: | + if [ "${{ inputs.skip-pdf-check }}" = "true" ]; then + echo "Skipping PDF accessibility check" + exit 0 + fi + python "${{ github.action_path }}/check_pdf_accessibility.py" + env: + PDF_ACCESSIBILITY_MODE: ${{ inputs.verapdf-validation-mode }} + PDF_ACCESSIBILITY_STRICT: ${{ inputs.verapdf-strict }} + shell: bash diff --git a/da_build/check_pdf_accessibility.py b/da_build/check_pdf_accessibility.py new file mode 100644 index 0000000..d84663f --- /dev/null +++ b/da_build/check_pdf_accessibility.py @@ -0,0 +1,572 @@ +#!/usr/bin/env python3 +"""Check PDFs in a docassemble repository for PDF/UA-1 accessibility compliance using veraPDF.""" + +import os +import subprocess +import sys +import xml.etree.ElementTree as ET +from pathlib import Path + +# --------------------------------------------------------------------------- +# Rule severity classification +# --------------------------------------------------------------------------- +# Severity levels: +# "info" - administrative/metadata only; logged but no annotation +# "warning" - advisory; always emits a GitHub warning annotation +# "fail" - real accessibility blocker; emits warning or error +# annotation depending on PDF_ACCESSIBILITY_MODE +# "form_annotation" - annotation structure / tab-order rules that only +# matter when forms are not flattened; treated as "fail" +# in strict mode, suppressed (logged only) otherwise +# +# Keys are (clause, test_number) tuples matching veraPDF XML attributes. +# Unmapped rules fall back to "fail" so nothing is silently swallowed. + +RULE_SEVERITY: dict[tuple[str, str], str] = { + # §5 — PDF/UA identification in XMP metadata (administrative) + ("5", "1"): "info", + ("5", "2"): "info", + ("5", "3"): "info", + ("5", "4"): "info", + ("5", "5"): "info", + # §6.1 — PDF version header syntax (technical, not user-facing) + ("6.1", "1"): "info", + # §6.2 — MarkInfo.Marked=true (document not tagged at all) + ("6.2", "1"): "fail", + # §7.1 — Content structure, metadata, and structure tree + ("7.1", "1"): "fail", # Artifact nested inside tagged content + ("7.1", "2"): "fail", # Tagged content nested inside Artifact + ("7.1", "3"): "fail", # Real content neither tagged nor Artifact + ("7.1", "4"): "warning", # Suspects=true (may generate false positives) + ("7.1", "5"): "warning", # Non-standard type not mapped to standard type + ("7.1", "6"): "fail", # Circular role map + ("7.1", "7"): "fail", # Standard type remapped + ("7.1", "8"): "warning", # No XMP metadata stream + ("7.1", "9"): "warning", # Missing dc:title in XMP + ("7.1", "10"): "warning", # Missing DisplayDocTitle viewer preference + ("7.1", "11"): "fail", # Missing StructTreeRoot (document untagged) + ("7.1", "12"): "fail", # Structure element missing parent entry + # §7.2 — Language (advisory) and table/list structure (real failures) + ("7.2", "2"): "warning", # Language for Outline entries undetermined + ("7.2", "3"): "fail", # Table has invalid child elements + ("7.2", "4"): "fail", # TR not contained in Table/THead/TBody/TFoot + ("7.2", "5"): "fail", # THead not in Table + ("7.2", "6"): "fail", # TBody not in Table + ("7.2", "7"): "fail", # TFoot not in Table + ("7.2", "8"): "fail", # TH not in TR + ("7.2", "9"): "fail", # TD not in TR + ("7.2", "10"): "fail", # TR has invalid child elements + ("7.2", "11"): "fail", # Table has more than one THead + ("7.2", "12"): "fail", # Table has more than one TFoot + ("7.2", "13"): "fail", # Table has TFoot but no TBody + ("7.2", "14"): "fail", # Table has THead but no TBody + ("7.2", "15"): "fail", # Table cells overlap + ("7.2", "16"): "fail", # Table Caption not first or last child + ("7.2", "17"): "warning", # LI not in L + ("7.2", "18"): "warning", # LBody not in LI + ("7.2", "19"): "warning", # L has invalid child elements + ("7.2", "20"): "warning", # LI has invalid child elements + ("7.2", "21"): "warning", # Language for ActualText in struct element + ("7.2", "22"): "warning", # Language for Alt in struct element + ("7.2", "23"): "warning", # Language for E attribute in struct element + ("7.2", "24"): "warning", # Language for annotation Contents + ("7.2", "25"): "warning", # Language for form field TU key + ("7.2", "26"): "warning", # TOCI not in TOC + ("7.2", "27"): "warning", # TOC has invalid child elements + ("7.2", "28"): "warning", # TOC Caption not first child + ("7.2", "29"): "warning", # Lang value not a valid Language-Tag + ("7.2", "30"): "warning", # Language for ActualText in Span + ("7.2", "31"): "warning", # Language for Alt in Span + ("7.2", "32"): "warning", # Language for E in Span + ("7.2", "33"): "warning", # Language for document metadata + ("7.2", "34"): "warning", # Language for text in page content + ("7.2", "36"): "fail", # THead has invalid child elements + ("7.2", "37"): "fail", # TBody has invalid child elements + ("7.2", "38"): "fail", # TFoot has invalid child elements + ("7.2", "39"): "fail", # Table has more than one Caption + ("7.2", "40"): "warning", # List Caption not first child + ("7.2", "41"): "fail", # Table columns span different numbers of rows + ("7.2", "42"): "fail", # Table rows span different numbers of columns + ("7.2", "43"): "fail", # Table rows span different numbers of columns (variant) + # §7.3 — Figures + ("7.3", "1"): "fail", # Figure missing Alt/ActualText + # §7.4 — Headings + ("7.4.2", "1"): "warning", # Heading level skipped + ("7.4.4", "1"): "warning", # Node contains more than one H tag + ("7.4.4", "2"): "warning", # Document mixes H and Hn tags + ("7.4.4", "3"): "warning", # Document mixes H and Hn tags + # §7.5 — Table header scope + ("7.5", "1"): "fail", # TD has no connected header + ("7.5", "2"): "fail", # TD references undefined header ID + # §7.7 — Mathematical formulae + ("7.7", "1"): "fail", # Formula missing Alt/ActualText + # §7.9 — Notes + ("7.9", "1"): "warning", # Note missing ID entry + ("7.9", "2"): "warning", # Note has non-unique ID + # §7.10 — Optional content (layers) + ("7.10", "1"): "info", + ("7.10", "2"): "info", + # §7.11 — Embedded files + ("7.11", "1"): "warning", # Embedded file spec missing F or UF key + # §7.15 — XFA forms + ("7.15", "1"): "fail", # Dynamic XFA form present + # §7.16 — Security / encryption + ("7.16", "1"): "warning", # Encryption P key missing accessibility bit + # §7.18 — Annotations and form fields + ("7.18.1", "1"): "fail", # Non-widget annotation not in Annot tag + ("7.18.1", "2"): "fail", # Non-widget annotation missing Contents/Alt + ("7.18.1", "3"): "fail", # Form field missing accessible name (TU key) + ("7.18.2", "1"): "fail", # TrapNet annotation present + ("7.18.3", "1"): "form_annotation", # Page with annotations missing Tabs=S + ("7.18.4", "1"): "form_annotation", # Widget annotation not in Form tag + ("7.18.4", "2"): "form_annotation", # Form element missing role or single widget child + ("7.18.5", "1"): "fail", # Link annotation not in Link tag + ("7.18.5", "2"): "warning", # Link annotation missing Contents description + ("7.18.6.2", "1"): "warning", # Media clip missing CT key + ("7.18.6.2", "2"): "warning", # Media clip missing Alt key + ("7.18.8", "1"): "info", # PrinterMark annotation in logical structure + # §7.20 — XObjects + ("7.20", "1"): "info", # Reference XObject (technically disallowed) + ("7.20", "2"): "warning", # Form XObject with MCIDs referenced multiple times + # §7.21 — Fonts + ("7.21.3.1", "1"): "warning", # Type0 font CIDSystemInfo mismatch + ("7.21.3.2", "1"): "warning", # Type2 CIDFont missing CIDToGIDMap + ("7.21.3.3", "1"): "warning", # Non-standard CMap not embedded + ("7.21.3.3", "2"): "warning", # Embedded CMap WMode mismatch + ("7.21.3.3", "3"): "warning", # CMap references non-standard CMap + ("7.21.4.1", "1"): "fail", # Font program not embedded + ("7.21.4.1", "2"): "fail", # Glyph missing from embedded font + ("7.21.4.2", "1"): "warning", # Type1 CharSet doesn't list all glyphs + ("7.21.4.2", "2"): "warning", # CIDFont CIDSet doesn't identify all glyphs + ("7.21.5", "1"): "warning", # Glyph width inconsistency + ("7.21.6", "1"): "warning", # Non-symbolic TrueType missing non-symbolic cmap + ("7.21.6", "2"): "warning", # Non-symbolic TrueType encoding not MacRoman/WinAnsi + ("7.21.6", "3"): "warning", # Symbolic TrueType has Encoding entry + ("7.21.6", "4"): "warning", # Symbolic TrueType cmap issue + ("7.21.7", "1"): "fail", # Glyph missing ToUnicode mapping (text unextractable) +} + + +def classify_rule(clause: str, test_number: str, strict: bool) -> str: + """Return effective severity for a failed rule given the current strict setting. + + Returns "info", "warning", "fail", or "suppressed". + """ + base = RULE_SEVERITY.get((clause, test_number), "fail") # unknown → fail + if base == "form_annotation": + return "fail" if strict else "suppressed" + return base + + +def find_pdfs(root_dir: Path) -> list[Path]: + """Find all PDFs, prioritizing docassemble/*/data/templates/ directories.""" + seen: set[Path] = set() + pdfs: list[Path] = [] + + # Priority: docassemble/*/data/templates/ (standard Assembly Line template location) + for pdf in sorted(root_dir.glob("docassemble/*/data/templates/**/*.pdf")): + key = pdf.resolve() + if key not in seen: + seen.add(key) + pdfs.append(pdf) + + # All other PDFs in the repository + for pdf in sorted(root_dir.rglob("*.pdf")): + key = pdf.resolve() + if key not in seen: + seen.add(key) + pdfs.append(pdf) + + return pdfs + + +def run_verapdf(pdfs: list[Path], verapdf_cmd: str = "verapdf") -> tuple[str, str]: + """Run veraPDF on a list of PDFs; return (stdout_xml, stderr).""" + cmd = [verapdf_cmd, "--flavour", "ua1", "--format", "xml"] + [str(p) for p in pdfs] + result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) + return result.stdout, result.stderr + + +def parse_results(xml_output: str) -> list[dict]: + """Parse veraPDF XML output and return a list of per-PDF result dicts.""" + try: + root = ET.fromstring(xml_output) + except ET.ParseError as e: + return [{"pdf": "unknown", "compliant": False, "parse_error": str(e), "failed_rules": []}] + + results = [] + for job in root.findall(".//job"): + item = job.find("item") + name_el = item.find("name") if item is not None else None + pdf_name = name_el.text or "unknown" if name_el is not None else "unknown" + + task_exc = job.find("taskException") + if task_exc is not None: + msg_el = task_exc.find("exceptionMessage") + msg = (msg_el.text or "Unknown error").strip() if msg_el is not None else "Unknown error" + results.append({ + "pdf": pdf_name, + "compliant": False, + "exception": msg, + "failed_rules": [], + }) + continue + + report = job.find("validationReport") + if report is None: + results.append({ + "pdf": pdf_name, + "compliant": False, + "exception": "No validation report in veraPDF output", + "failed_rules": [], + }) + continue + + is_compliant = report.get("isCompliant", "false").lower() == "true" + failed_rules = [] + + if not is_compliant: + for rule in report.findall(".//rule[@status='failed']"): + desc_el = rule.find("description") + description = desc_el.text.strip() if desc_el is not None and desc_el.text else "" + clause = rule.get("clause", "") + test_number = rule.get("testNumber", "") + failed_rules.append({ + "specification": rule.get("specification", ""), + "clause": clause, + "test_number": test_number, + "description": description, + "failed_checks": int(rule.get("failedChecks", 0)), + # base_severity is set here; effective severity depends on strict mode + "base_severity": RULE_SEVERITY.get((clause, test_number), "fail"), + }) + + results.append({ + "pdf": pdf_name, + "compliant": is_compliant, + "failed_rules": failed_rules, + }) + + return results + + +def emit_annotation(level: str, title: str, message: str) -> None: + """Emit a GitHub Actions workflow command annotation.""" + encoded = message.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A") + print(f"::{level} title={title}::{encoded}", flush=True) + + +def write_summary(results: list[dict], strict: bool) -> None: + """Append a Markdown section to the GitHub Actions job summary.""" + summary_path = os.environ.get("GITHUB_STEP_SUMMARY") + if not summary_path: + return + + total = len(results) + lines = ["## PDF Accessibility Check (PDF/UA-1)", ""] + + if total == 0: + lines.append("_No PDFs found in repository._") + with open(summary_path, "a", encoding="utf-8") as f: + f.write("\n".join(lines) + "\n") + return + + # Classify every failed rule with its effective severity + def effective_rules(result: dict) -> dict[str, list[dict]]: + """Return rules bucketed by effective severity for a single result.""" + buckets: dict[str, list[dict]] = {"fail": [], "warning": [], "info": [], "suppressed": []} + for rule in result.get("failed_rules", []): + sev = classify_rule(rule["clause"], rule["test_number"], strict) + buckets[sev].append(rule) + return buckets + + # Count PDFs with at least one fail-level issue + fail_count = sum( + 1 for r in results + if not r.get("compliant") and ( + r.get("exception") or r.get("parse_error") + or any( + classify_rule(rule["clause"], rule["test_number"], strict) == "fail" + for rule in r.get("failed_rules", []) + ) + ) + ) + warn_only_count = sum( + 1 for r in results + if not r.get("compliant") and not r.get("exception") and not r.get("parse_error") + and not any( + classify_rule(rule["clause"], rule["test_number"], strict) == "fail" + for rule in r.get("failed_rules", []) + ) + and any( + classify_rule(rule["clause"], rule["test_number"], strict) == "warning" + for rule in r.get("failed_rules", []) + ) + ) + + if fail_count == 0 and warn_only_count == 0: + lines.append(f"✅ All {total} PDF(s) passed PDF/UA-1 accessibility checks.") + elif fail_count > 0: + lines.append( + f"❌ **{fail_count} of {total} PDF(s) have accessibility failures** " + f"that require attention." + ) + if warn_only_count > 0: + lines.append(f"⚠️ {warn_only_count} additional PDF(s) have advisory warnings only.") + else: + lines.append(f"⚠️ **{warn_only_count} of {total} PDF(s) have advisory warnings.**") + + if strict: + lines.append( + "_Strict mode enabled: tab-order and form-annotation structure rules are active._" + ) + else: + lines.append( + "_Non-strict mode: tab-order and form-annotation structure rules are suppressed " + "(forms may be flattened before users see them). Enable with `verapdf-strict: true`._" + ) + lines.append("") + + def _render_pdf_section(result: dict, buckets: dict[str, list[dict]]) -> list[str]: + out = [] + pdf_path = result["pdf"] + pdf_name = Path(pdf_path).name + out.append(f"#### `{pdf_name}`") + if pdf_name != pdf_path: + out.append(f"_Path: `{pdf_path}`_") + out.append("") + + if result.get("exception"): + out.append(f"> ❌ **Error:** {result['exception']}") + out.append("") + return out + if result.get("parse_error"): + out.append(f"> ❌ **Parse error:** {result['parse_error']}") + out.append("") + return out + + def _rule_table(rule_list: list[dict]) -> list[str]: + rows = ["| Rule | Description | Occurrences |", "|------|-------------|-------------|"] + for rule in rule_list: + spec = rule["specification"] + clause = rule["clause"] + test = rule["test_number"] + ref = f"{spec} §{clause}" + (f".{test}" if test else "") + desc = rule["description"].replace("|", "\\|") + rows.append(f"| `{ref}` | {desc} | {rule['failed_checks']} |") + return rows + + if buckets["fail"]: + out.extend(_rule_table(buckets["fail"])) + out.append("") + if buckets["warning"]: + out.append("
Advisory warnings") + out.append("") + out.extend(_rule_table(buckets["warning"])) + out.append("") + out.append("
") + out.append("") + if buckets["suppressed"]: + out.append( + f"
Suppressed (form annotation / tab order) " + f"— {len(buckets['suppressed'])} rule(s)" + ) + out.append("") + out.extend(_rule_table(buckets["suppressed"])) + out.append("") + out.append("
") + out.append("") + if buckets["info"]: + out.append( + f"
Informational — {len(buckets['info'])} rule(s)" + ) + out.append("") + out.extend(_rule_table(buckets["info"])) + out.append("") + out.append("
") + out.append("") + return out + + # Failing PDFs first + failing_pdfs = [ + r for r in results + if not r.get("compliant") and ( + r.get("exception") or r.get("parse_error") + or any( + classify_rule(rule["clause"], rule["test_number"], strict) == "fail" + for rule in r.get("failed_rules", []) + ) + ) + ] + if failing_pdfs: + lines.append("### ❌ Accessibility Failures") + lines.append("") + for result in failing_pdfs: + buckets = effective_rules(result) + lines.extend(_render_pdf_section(result, buckets)) + + # Advisory-only PDFs + warn_only_pdfs = [ + r for r in results + if not r.get("compliant") and not r.get("exception") and not r.get("parse_error") + and not any( + classify_rule(rule["clause"], rule["test_number"], strict) == "fail" + for rule in r.get("failed_rules", []) + ) + and any( + classify_rule(rule["clause"], rule["test_number"], strict) in ("warning", "suppressed") + for rule in r.get("failed_rules", []) + ) + ] + if warn_only_pdfs: + lines.append("### ⚠️ Advisory Warnings Only") + lines.append("") + for result in warn_only_pdfs: + buckets = effective_rules(result) + lines.extend(_render_pdf_section(result, buckets)) + + # Passing PDFs + passing = [r for r in results if r.get("compliant")] + if passing: + lines.append("### ✅ Passing PDFs") + lines.append("") + for r in passing: + lines.append(f"- ✅ `{Path(r['pdf']).name}`") + lines.append("") + + with open(summary_path, "a", encoding="utf-8") as f: + f.write("\n".join(lines) + "\n") + + +def main() -> int: + verapdf_cmd = os.environ.get("VERAPDF_CMD", "verapdf") + failure_mode = os.environ.get("PDF_ACCESSIBILITY_MODE", "warning") + strict = os.environ.get("PDF_ACCESSIBILITY_STRICT", "false").lower() in ("true", "1", "yes") + root_dir = Path(os.environ.get("GITHUB_WORKSPACE", ".")) + + mode_label = f"mode={failure_mode}, {'strict' if strict else 'non-strict'}" + + try: + subprocess.run([verapdf_cmd, "--version"], capture_output=True, timeout=30, check=True) + except (FileNotFoundError, subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc: + emit_annotation( + "error", + "PDF Accessibility", + f"veraPDF not found or failed to start ({exc}). Skipping accessibility check.", + ) + return 0 # Don't fail the build if veraPDF itself can't run + + pdfs = find_pdfs(root_dir) + if not pdfs: + print("No PDFs found in repository. Skipping accessibility check.", flush=True) + write_summary([], strict) + return 0 + + print(f"Checking {len(pdfs)} PDF(s) for PDF/UA-1 accessibility ({mode_label}):", flush=True) + for pdf in pdfs: + try: + rel = pdf.relative_to(root_dir) + except ValueError: + rel = pdf + print(f" {rel}", flush=True) + + xml_output, stderr = run_verapdf(pdfs, verapdf_cmd) + if stderr: + for line in stderr.splitlines(): + if line.strip(): + print(f" [verapdf] {line}", flush=True) + + if not xml_output.strip(): + emit_annotation( + "error", + "PDF Accessibility", + "veraPDF produced no output. Check that veraPDF installed correctly.", + ) + return 0 + + results = parse_results(xml_output) + total = len(results) + + # Per-PDF console summary + print("", flush=True) + for result in results: + name = Path(result["pdf"]).name + if result.get("compliant"): + print(f" ✓ {name}: compliant", flush=True) + elif result.get("exception") or result.get("parse_error"): + msg = result.get("exception") or result.get("parse_error") + print(f" ✗ {name}: error — {msg}", flush=True) + else: + buckets: dict[str, int] = {"fail": 0, "warning": 0, "info": 0, "suppressed": 0} + for rule in result["failed_rules"]: + sev = classify_rule(rule["clause"], rule["test_number"], strict) + buckets[sev] += 1 + parts = [] + if buckets["fail"]: + parts.append(f"{buckets['fail']} failure(s)") + if buckets["warning"]: + parts.append(f"{buckets['warning']} warning(s)") + if buckets["suppressed"]: + parts.append(f"{buckets['suppressed']} suppressed") + if buckets["info"]: + parts.append(f"{buckets['info']} info") + print(f" ✗ {name}: {', '.join(parts) if parts else 'non-compliant'}", flush=True) + + write_summary(results, strict) + + # Collect PDFs with real failures (fail-severity violations) + failing = [ + r for r in results + if not r.get("compliant") and ( + r.get("exception") or r.get("parse_error") + or any( + classify_rule(rule["clause"], rule["test_number"], strict) == "fail" + for rule in r.get("failed_rules", []) + ) + ) + ] + # Collect PDFs with advisory warnings only + warn_only = [ + r for r in results + if not r.get("compliant") and not r.get("exception") and not r.get("parse_error") + and not any( + classify_rule(rule["clause"], rule["test_number"], strict) == "fail" + for rule in r.get("failed_rules", []) + ) + and any( + classify_rule(rule["clause"], rule["test_number"], strict) == "warning" + for rule in r.get("failed_rules", []) + ) + ] + + if failing: + failing_names = ", ".join(Path(r["pdf"]).name for r in failing) + total_violations = sum( + sum(1 for rule in r.get("failed_rules", []) + if classify_rule(rule["clause"], rule["test_number"], strict) == "fail") + for r in failing + ) + annotation_msg = ( + f"{len(failing)} of {total} PDF(s) have PDF/UA-1 accessibility failures: " + f"{failing_names}. {total_violations} rule violation(s). " + "See the job summary for details." + ) + level = "error" if failure_mode == "error" else "warning" + emit_annotation(level, "PDF Accessibility (PDF/UA-1)", annotation_msg) + if failure_mode == "error": + return 1 + + elif warn_only: + warn_names = ", ".join(Path(r["pdf"]).name for r in warn_only) + emit_annotation( + "warning", + "PDF Accessibility (PDF/UA-1)", + f"{len(warn_only)} PDF(s) have advisory accessibility warnings: {warn_names}. " + "See the job summary for details.", + ) + + return 0 + + +if __name__ == "__main__": + sys.exit(main())