diff --git a/Cargo.lock b/Cargo.lock index 6aee6162e..a1aa83f58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3812,6 +3812,7 @@ dependencies = [ "anyhow", "async-trait", "base64 0.22.1", + "clap", "flate2", "glob", "hashlink", diff --git a/claude-notes/plans/2026-05-06-attribution-pipeline-flow-v2.svg b/claude-notes/plans/2026-05-06-attribution-pipeline-flow-v2.svg new file mode 100644 index 000000000..6551048bd --- /dev/null +++ b/claude-notes/plans/2026-05-06-attribution-pipeline-flow-v2.svg @@ -0,0 +1,320 @@ + + + + + + + + + + + + + + + + + Attribution pipeline — data flows (v2, post-implementation) + Updates v1 to match the shipped code: writer-side lookup is a SourceInfo-pointer-keyed HashMap, not a slice; format_options is a struct (not enum); pampa writers carry their own AttributionRecord types. + + + + + + CLI input + + WASM input + + Provider + + RenderContext + + Existing stage + + ★ New stage + + On-the-wire data + + Writer + + Output + + data hand-off + + skip / fallthrough + + + + 1. Inputs (mutually exclusive per render invocation) + + + + CLI / native render (binary is q2) + $ q2 render doc.qmd --to html --attribution=git + RenderArgs.attribution: Option<AttributionMode> (None | Git | Off) · crates/quarto/src/commands/render.rs:70 + YAML alt: top-level attribution: git | off | false (key absent ⇒ None) + Resolution: CLI Some wins → else YAML → else Off (silent override) + + + + + Hub-client / WASM (q2-debug only in v1) + parse_qmd_to_ast_with_attribution(content, attributionJson?: string) + parse_qmd_to_ast(content) is preserved as a thin wrapper ⇒ delegates with None + useAttribution.ts replays Automerge incrementally → RLE runs + IdentityMap; ships { runs:[…], identities:{…} } JSON over the WASM boundary + Direct-invokes ★ Attribution{Generate,Render} after 3-stage parse — q2-debug excludes AstTransformsStage (lib.rs:973-1000) + + + + 2. Providers (impl AttributionSourceProvider — single method build(ctx) → AttributionData; producer invariant: every actor in runs has an identities entry) + + + + GitBlameProvider (native only) + spawn ctx.binaries.git → git blame --porcelain <file> + parses porcelain → AttributionData { runs, identities } (synthesises identities per author): + email → (mail-local-part, actor_color(fnv1a_hex8(email))) // satisfies producer invariant + Graceful degradation: missing git OR not in working tree + ⇒ DiagnosticMessage warning + empty AttributionData (render still succeeds) + + + + + PreBuiltAttributionProvider (WASM) + serde_json::from_str(&attribution_json)? → TransportAttributionData → AttributionDataBuilder → AttributionData + Trivial provider — no replay in Rust; runs + identities both computed in JS + JSON is transport-only — never lands in ast.meta; builder restores the Arc<str> interning invariant after decode + TS replay fills identities: Automerge metadata when present, else (actor[..8], actorColor(fnv1aHex8(actor))) — invariant holds at wire + Avoids automerge-rs in WASM bundle (~hundreds of KB); no git, no shell-out + + + + + + + 3. Wire-up on RenderContext (two new fields — opt-in signal + sidecar — plus BinaryDependencies.git and ctx.format_options for §7) + + + + RenderContext · crates/quarto-core/src/render.rs:271, :284 + attribution_provider: Option<Arc<dyn AttributionSourceProvider>> ← set from §1 input + attribution_data: Option<Arc<AttributionData>> ← populated by §4, read by §6 + Both default None ⇒ feature off ⇒ pipeline byte-identical to today. Also new: BinaryDependencies.git (graceful-degrade) · RenderContext.format_options (§7). + + + + + + + 4. Navigation Phase (pipeline.rs:721-873; ★ AttributionGenerate pushed at pipeline.rs:873) + + + pure generates → listings sub-block (mixed gen+render) → navigation renders → ★ AttributionGenerate as the phase tail + + + → pure generates sub-block: + + + + + TocGenerateTransform + + NavbarGenerateTransform + + SidebarGenerateTransform + + PageNavGenerateTransform + + FooterGenerateTransform + + + + + + + + + → listings sub-block (post-#169): ListingGenerate · ListingRender · CategoriesSidebar · ListingFeedStage (native) / ListingFeedLink — orthogonal to attribution; does not read ctx.attribution_data. + + + → renders sub-block: + + + + + TocRenderTransform + + NavbarRenderTransform + + SidebarRenderTransform + + PageNavRenderTransform + + FooterRenderTransform + + + + + + ★ AttributionGenerateTransform (new — last entry in the Navigation Phase, after every navbar/sidebar/listings/footer/feeds transform) + name() = "attribution-generate" · pushed at pipeline.rs:873 (after FooterRenderTransform), immediately before LinkRewriteTransform opens the Finalization Phase + + if !format_supports_attribution(ctx.format) { return Ok(()); } // PDF / typst / native: no writer hook + if is_feature_disabled(&ast.meta, "attribution") { return Ok(()); } + let Some(provider) = ctx.attribution_provider.clone() else { return Ok(()); }; + // (no "user runs override" rule — users don't author byte-range tuples in YAML) + + // run provider, merge user identities, set sidecar: + let mut data = provider.build(ctx)?; // AttributionData { runs, identities } + data.identities = merge_user_over(from_config_value(&ast.meta), data.identities); // user wins on collision + ctx.attribution_data = Some(Arc::new(data)); // ⇒ §5 sidecar (NOT ast.meta) + Reads ctx.attribution_provider (§3) + user-authored meta.attribution.identities only. Writes ctx.attribution_data (§5). End-of-Navigation-Phase keeps it grouped with other *-generate work. + + + + + any skip ⇒ no + provider call · + ctx.attribution_data + stays None · + user meta untouched + + + 5. ctx.attribution_data :: typed sidecar contract between §4 generate and §6 render (NOT in ast.meta — see plan §"Why a sidecar, not meta.attribution") + + + + ctx.attribution_data :: Option<Arc<AttributionData>> (typed sidecar — ~20× smaller per run than a ConfigValue::Map round-trip would be) + // in-memory shape — Rust struct on RenderContext. Serde derives exist solely for the WASM transport boundary (§1, PreBuilt path). + AttributionData { runs: AttributionMap (Vec<AttributionRun { start, end, actor: Arc<str>, time }>), + identities: IdentityMap (HashMap<Arc<str>, Identity { display_name, color }>) } // Arc keys ⟂ run actors (interned) + User-authored `meta.attribution.identities` (small ConfigValue::Map from YAML) is read by §4 and merged in (user wins on collision). v2 multi-file: just changes `runs` field type to HashMap<PathBuf, AttributionMap> — no wire-format gymnastics. + + + + + writes ctx.attribution_data + + + 6. Finalization Phase (★ AttributionRender pushed at pipeline.rs:895) + + + order: LinkRewrite · AppendixStructure · CrossrefRender · ResourceCollector · ★ AttributionRender (last) + + + … earlier finalization stages … + + + ResourceCollectorTransform + + + + ★ AttributionRenderTransform (very last transform in the HTML pipeline; also direct-invoked by the q2-debug WASM path) + name() = "attribution-render" · reads ctx.attribution_data · builds a pointer-keyed lookup map + walk-order slice + actors table · no AST mutation + + let Some(data) = ctx.attribution_data.clone() else { return Ok(()); }; // sidecar — None ⇒ no-op + + // 1. Build actors table up-front from every distinct actor in data.runs. ≤K identity resolves, ≤K warnings — not N. + let mut actors: IdentityMap = IdentityMap::new(); + for run in data.runs.as_slice() { + if actors.contains_key(&run.actor) { continue; } + let identity = data.identities.get(&run.actor).cloned().unwrap_or_else(|| { ctx.diagnostics.push(warn!(…)); UNKNOWN_IDENTITY }); } + + // 2. Single AST walk: query each node's SourceInfo and populate slice + by_node map. + let mut slice: Vec<Option<AttributionRecord>> = Vec::new(); let mut by_node: HashMap<usize, AttributionRecord> = HashMap::new(); + walk(ast, |source_info| { let r = query(source_info, &data.runs); if let Some(r)=&r { by_node.insert(source_info as *const _ as usize, r.clone()); } slice.push(r); }); + + // 3. Six direct assignments — no set_attribution() method. html + json each get all three artefacts; the Arcs are shared. + ctx.format_options.{html,json}.{attribution_lookup, attribution_by_node, attribution_identities|attribution_actors} = Some(Arc::clone(…)); + + + + + + + + reads ctx.attribution_data + + + query() chain-resolves SourceInfo via local resolve_byte_range (Original / Substring only; Concat & FilterProvenance ⇒ None) then enforces the v1 single-doc invariant: file_id ≠ 0 ⇒ skip (no map_offset / SourceContext needed). + + + 7. Carrier on RenderContext.format_options (struct, not enum — both sub-blocks populated unconditionally so the transform stays format-agnostic) + + + + struct FormatOptions { pub html: HtmlFormatOptions, pub json: JsonFormatOptions } · render.rs:44-94 + HtmlFormatOptions { attribution_lookup: Option<Arc<[Option<AttributionRecord>]>>, attribution_by_node: Option<Arc<HashMap<usize, AttributionRecord>>>, attribution_identities: Option<Arc<IdentityMap>> } + JsonFormatOptions { attribution_lookup: Option<Arc<[…]>>, attribution_by_node: Option<Arc<HashMap<usize, AttributionRecord>>>, attribution_actors: Option<Arc<IdentityMap>> } + attribution_lookup survives as a regression invariant ("non-empty when attribution is on"); writers read attribution_by_node for O(1) lookup keyed by &SourceInfo as *const _ as usize. + + + + six direct assignments (3 per sub-block; the Arcs are shared) + + + 8. Writer dispatch (pampa-local record types; only by_node + identities/actors cross the boundary — the slice does NOT reach the writer) + + + + pampa::writers::html::HtmlConfig · writers/html.rs:51,58 + attribution_by_node: Option<Arc<HashMap<usize, HtmlAttributionRecord>>> + attribution_identities: Option<Arc<HashMap<Arc<str>, HtmlAttributionIdentity>>> + get_block_attribution / get_inline_attribution: by_node.get(&source_info ptr) ⇒ data-attr-actor · data-attr-time · data-attr-name · data-attr-color + Coalesces contiguous same-(actor,time) PROSE inlines into one outer wrapper; structured inlines (Code/Emph/Link/Span/Math) get their own wrappers. data-attr-* ⟂ data-sid/data-loc. + + + + + pampa::writers::json::JsonConfig · writers/json.rs:57,62 + attribution_by_node: Option<Arc<HashMap<usize, JsonAttributionRecord>>> + attribution_actors: Option<Arc<HashMap<Arc<str>, JsonAttributionIdentity>>> + maybe_record_attribution_for(source_info, s_id): by_node.get(ptr) ⇒ push { s, actor, time } into astContext.attribution; actors emitted once as astContext.attributionActors. + skip_serializing_if on both fields ⇒ both keys absent when off (byte-identical to today's output). + + + Boundary translation at writer-config construction: quarto_core::attribution::{AttributionRecord, Identity} → Html/Json{AttributionRecord, AttributionIdentity}. Pampa keeps its own types so the crate stays free of quarto-core dependencies. + + + + + + 9. Outputs (consumers — wire shape unchanged from v1) + + + + HTML body → browser / static viewer + <span data-attr-actor=… data-attr-time=… data-attr-name=… data-attr-color=…> (coalesced prose run) </span> + + + + + AST JSON → hub-client ReactAstDebugRenderer + astContext.attribution: [{s, actor, time}, …] astContext.attributionActors: { actor: {name, color} } + + + + + diff --git a/claude-notes/plans/2026-05-06-attribution-pipeline.md b/claude-notes/plans/2026-05-06-attribution-pipeline.md new file mode 100644 index 000000000..b3a5064c2 --- /dev/null +++ b/claude-notes/plans/2026-05-06-attribution-pipeline.md @@ -0,0 +1,2340 @@ +# Attribution Pipeline (Rust port of `feat/node-attribution`) + +## Overview + +Port the per-node authorship feature prototyped on `feat/node-attribution` from +hub-client TypeScript into the q2 Rust render pipeline. The TS prototype works +end-to-end against the AST debug view but lives entirely above the WASM +boundary. Moving it down has two payoffs: + +1. **All renderers, not just q2-debug** can consume attribution. HTML, slides, + future Typst/PDF — anything that owns a writer can opt in. +2. **Both inputs (Automerge live history; `git blame --porcelain`) feed one + canonical form**. The CLI and the editor stop diverging on what + "attribution" even means. + +The shape mirrors the navigation stages already in +`crates/quarto-core/src/transforms/`: + +| Concept | Navbar | Attribution | +|-------------------|----------------------------------------|---------------------------------------------| +| Generate stage | `NavbarGenerateTransform` | `AttributionGenerateTransform` | +| Stage name | `"navbar-generate"` | `"attribution-generate"` | +| Render stage | `NavbarRenderTransform` | `AttributionRenderTransform` | +| Stage name | `"navbar-render"` | `"attribution-render"` | +| Generate output | `meta.navigation.navbar` (ConfigValue) | `ctx.attribution_data` (sidecar `Arc`) — see "Why a sidecar, not `meta.attribution`" below | +| Render output | `meta.rendered.navigation.navbar` (HTML)| AST mutation / writer-config side-channel | +| Opt-in | `navbar:` YAML key + ProjectIndex | `--attribution=` CLI; `attribution:` YAML (hub-client uses a separate WASM entry point, not this flag) | +| Skip predicate | `is_feature_disabled(meta, "navbar")` | New `attribution_source_for(ctx, meta)` returns `None` | + +**User-authored `identities` still live in `meta.attribution.identities`** — +that's just normal YAML config (small, user-overridable), read by Generate +during the merge step. What does NOT live in meta is the bulk `runs` data; +see "Why a sidecar, not `meta.attribution`" below for the rationale. + +The two attribution stages bracket the Finalization Phase: + +- **`AttributionGenerateTransform`** is registered as the **last entry + in the Navigation Phase** (`crates/quarto-core/src/pipeline.rs:780-847` + as of #169), immediately after `FooterRenderTransform` (currently + line 847). It runs after every navbar/sidebar/page-nav/footer/ + listings/feeds/categories transform has finished — i.e., after *all* + the website transforms — so all `navigation.*` metadata is fully + populated by the time it writes `ctx.attribution_data`. +- **`AttributionRenderTransform`** is registered as the **last + transform in the Finalization Phase** (lines 849-860), immediately + after `ResourceCollectorTransform` (currently line 860). It runs + last in the pipeline, just before the writer is invoked. + +This means the entire Finalization Phase (`LinkRewriteTransform` → +`AppendixStructureTransform` → `CrossrefRenderTransform` → +`ResourceCollectorTransform`) runs *between* generate and render — +which is fine: none of those transforms read or write +`ctx.attribution_data`, and none mutates `SourceInfo` in ways that +would invalidate the per-node byte ranges attribution-render later +queries. + +We considered an earlier draft that slotted `AttributionGenerateTransform` +inside the Navigation Phase between `FooterGenerateTransform` and +`ListingGenerateTransform` — i.e., **inside** the phase rather than at +its tail. That mid-phase placement was rejected: attribution isn't a +navigation concern, doesn't read or write the `navigation.*` subtree, +and doesn't interact with sidebars/footers/TOCs/listings, so +interleaving it with the navigation generates suggests a coupling that +doesn't exist. End-of-Navigation-Phase placement keeps the stage with +other `*-generate` work without putting it in the middle of unrelated +stages. + +We also considered pairing the two attribution stages back-to-back at +the very tail of the Finalization Phase. That would have given a +tighter visual pairing (easy to find both in `pipeline.rs`) but +forfeited a real benefit of the end-of-Navigation-Phase placement: +`AttributionGenerateTransform` is a `*-generate` stage and reads +naturally with the other generate transforms. Leaving the entire +Finalization Phase between generate and render is also fine — no +transform there reads or writes `ctx.attribution_data` — so the +placement is free. + +The profile checkpoint (`DocumentProfileStage`) is read-only and runs +much earlier; attribution does not need to participate in it for v1. + +`quarto trace view` discovers stages via `AstTransform::name()` (see +`crates/quarto-core/src/transform.rs:69-90` and the call site in +`TransformPipeline::execute` at line 154-159), so once the two new transforms +register their names they appear in the existing trace UI without any other +changes. + +## Branch context + +**This plan is implemented on a new feature branch.** Suggested name: +`feat/attribution-pipeline` (the implementer may choose differently +when work begins, but pinning a suggestion here means day-one commits +don't need retroactive renaming). Wherever the plan below refers to +"the implementation branch," that's the branch you create on day one. + +**Fork point: `main`.** The implementation branch is forked off `main`, +*not* off `feat/node-attribution`. The TS prototype on +`feat/node-attribution` is **reference material only** — the +implementation branch starts clean, and Phase 5 builds the minimum +producer-side TS the new WASM entry point needs, using the prototype as +a design reference. None of the prototype's consumer-side machinery +(`useNodeAttributionResolver`, in-process query/cache code paths, +`getNodeAttribution` calls in `ReactAstDebugRenderer.tsx`, etc.) is +brought over — that's exactly what the Rust pipeline replaces, so it +would be net-deleted work to import it. + +**Wherever this plan names a TS file or symbol from the prototype, that +reference is to `feat/node-attribution` and is for design reference.** +The implementation branch should NOT contain those files at fork time +(they don't exist on `main`); whatever ends up on the implementation +branch is built fresh, drawing on the prototype's algorithm and data +shape (cherry-pick, rewrite, or selectively port — implementer's +choice). References to existing Rust code (everything in `crates/`) +target the implementation branch directly and should track `main` via +rebase as the branch progresses. + +**Landing strategy.** The implementation branch lands as a single PR to +`main`, delivering the Rust attribution pipeline together with the +minimum TS producer that feeds it. The `feat/node-attribution` branch +is left untouched — once this plan ships, the prototype branch is +historical reference and can be archived/deleted separately at the +user's discretion. + +## Vocabulary + +- **Actor** — opaque string identifying who made an edit. From Automerge: + the Automerge actor ID. From git: the author email. The pipeline never + interprets the value beyond hashing/string-slicing; identity is + supplied by providers (every provider populates an `IdentityMap` + entry for each actor it produces in `runs` — see Phase 6's producer + invariant), merged with user override in + `AttributionGenerateTransform`, and read by `AttributionRenderTransform`. + The render-side warning path (diagnostic + ``/`#888888` + placeholder) handles producer-invariant violations and does not + fire on happy paths. +- **AttributionRun** — `{ start: usize, end: usize, actor: Arc, time: i64 }`, + byte offsets in *the document's primary file's* source bytes. Sorted, + non-overlapping, contiguous. `actor` is `Arc` (not `String`) so the + same Arc is shared across every run by the same author — for a doc with + 5 contributors and 1000 runs this is 5 string allocations + 1000 cheap + pointer clones, not 1000 string allocations. +- **AttributionMap** — a transparent newtype around `Vec` + for the document being rendered. **Single-document only in v1**; no + file keying. The in-memory queryable form (binary search via + `AttributionSource`). v2 (multi-file via includes) replaces the field + type with a path-keyed map; see Open Question #2. +- **IdentityMap** — `HashMap, Identity>` (keyed by the same + `Arc` used in `AttributionRun.actor`) where + `Identity = { display_name: String, color: String }`. The merged result + of `meta.attribution.identities` (user override) ∪ provider-supplied + identities (from Automerge actor metadata or git author-mail). Built + by `AttributionGenerateTransform`; consumed by `AttributionRenderTransform`. + Empty when no source supplied identities; unmapped actors fall back to + `actor[..8]` plus a deterministic palette hash at render time. +- **AttributionData** — `{ runs: AttributionMap, identities: IdentityMap }`. + The canonical in-memory shape, held as `Arc` on + `RenderContext.attribution_data` (the sidecar). **Not** a wire form — + not stored in `ast.meta`, not serialized to ConfigValue. The sole + exception is the WASM boundary, where hub-client ships a JSON-encoded + `AttributionData` to `parse_qmd_to_ast_with_attribution` (Phase 3b); + that JSON is parsed and dropped into the sidecar immediately, + never visiting `ast.meta`. +- **`meta.attribution.identities` (user input)** — the small, + user-authored override map at the conventional YAML location, parsed + into a `ConfigValue::Map` by the existing YAML-to-meta pipeline. Read + by `AttributionGenerateTransform` during the merge step. Stays in meta + as user input; the canonical merged form lives on the sidecar. +- **AttributionSource** (Rust trait) — identical role to the TS + `AttributionSource`: a `query_byte_range(start, end) -> Option<{actor, time}>` + function (no `file_id` parameter — v1 is single-doc). Implemented for + `AttributionMap` (i.e. `Vec`) via binary search; + `AttributionGenerateTransform` builds an `AttributionData` and stores + `Arc` on `ctx.attribution_data`. v2 re-introduces a + file parameter when multi-file blame ships. + +### Why a sidecar, not `meta.attribution` + +The plan originally proposed storing `AttributionData` at `meta.attribution` +as a `ConfigValue::Map`, by analogy with the navbar/footer/sidebar pattern. +That analogy breaks on volume. + +`AttributionRun` records scale with document length (typical: ~1 run per +~100 bytes of prose after RLE coalescing, so ~500 runs for a 50 KB +chapter, ~10K for a book-length doc). One run as a `ConfigValue::Map` +costs ~600–800 B (outer Map wrapper + 4 entries × `ConfigMapEntry`, +each with its own `SourceInfo`/`MergeOp`/`ConfigValueKind`); the same +run as `AttributionRun` is ~40 B. That is a **~20× memory multiplier +on the hottest data structure in the render pipeline**, and the cost +lands repeatedly — every Finalization-Phase transform that walks +`ast.meta` (even just to check a key) pays a slice of it. + +Concretely: + +| Doc size | Runs | `meta.attribution` cost | sidecar `Vec` | +|----------|-----:|------------------------:|------------------------------:| +| 5 KB | 50 | ~35 KB | ~2 KB | +| 50 KB | 500 | ~350 KB | ~20 KB | +| Book | 10K | ~7 MB | ~400 KB | + +The convention argument (Lua filter introspection of `meta.attribution`) +doesn't survive scrutiny either: +- The Lua-filter slot doesn't exist yet (bd-0fd0 is future). +- Any real Lua use case (e.g. "colour code blocks by author") needs + `lookup(start, end) → actor`, not raw runs — walking N runs per + node is O(N×M) and unusable. The right Lua surface, when bd-0fd0 + lands, is a `pandoc.attribution.lookup(...)` accessor backed by the + sidecar, not raw meta access. +- `identities` (the part users actually want to override) does stay + in meta as `meta.attribution.identities`, preserving the convention + exactly where it pays off. + +If a real consumer of `meta.attribution` materializes later, the +migration is small and purely additive: `AttributionGenerateTransform` +gains a `to_config_value()` and dual-writes meta alongside the sidecar. +No existing consumer breaks because the sidecar stays the source of +truth. + +### Future Lua-filter access (bd-0fd0) + +Today there is no Lua filter slot between generate and render in q2, +so this section is forward-looking: when bd-0fd0 (or whatever the +Lua-injection slot lands as) ships, the two attribution surfaces a +filter would legitimately want are accessible without any plan change. + +1. **Identities (read by Lua via `meta.attribution.identities`).** + User-authored identities already live in meta as a + `ConfigValue::Map` from YAML parse — accessible to any Lua filter + the moment bd-0fd0 exposes `meta` to filters, no different from + `meta.author` or `meta.toc`. This is the surface a "colour code + blocks by author" filter actually needs: actor → `(name, color)` + lookup. The *merged* identities (provider-supplied ∪ user override) + live on the sidecar; if a filter wants the merged set instead of + user input alone, the Lua host binds + `pandoc.attribution.identities()` to read + `ctx.attribution_data.identities`. That binding is a few lines + of host code, independent of this plan. + +2. **Per-node attribution lookup + (`pandoc.attribution.lookup(start, end) -> { actor, time } | nil`).** + The Lua-callable form of `AttributionSource::query_byte_range`, + backed by the sidecar's `AttributionMap`. The trait method exists + from Phase 1, so the bd-0fd0 binding is purely host-side + plumbing — no new Rust surface required. This is the *only* + useful API shape for raw runs: walking the run list per-node + from Lua would be O(N × M) and unusable, which is why exposing + the runs through `meta.attribution` as a `ConfigValue::Map` + would actively invite an anti-pattern. + +In short: the sidecar choice does not foreclose Lua access — it +narrows it to the access pattern that's actually performant. +`meta.attribution.identities` covers the conventional read-from-meta +case; `pandoc.attribution.lookup(...)` covers per-node queries via +a host binding to the sidecar. + +If `quarto inspect` or `--keep-md` later needs to surface raw +attribution data for debugging, `AttributionGenerateTransform` grows +the additive `to_config_value()` dual-write described above — that +debug surface and the Lua-binding surface are independent and +either can ship without the other. + +## Phase 0 — Test plan (TDD, write first) + +> **DO NOT begin Phase 1 implementation until every test below is checked in, +> running, and red.** The CLAUDE.md TDD rule is non-negotiable. +> +> **Status: complete (commit `b2ee6e70`).** 46 Phase 0 tests checked in +> — 8 green as regression pins, 38 red against `unimplemented!()` +> bodies. Phase 1-6 implementers turn the red ones green incrementally. + +### Unit-test crates / files to create + +- [x] `crates/quarto-core/src/transforms/attribution_generate.rs` + (transform stub with `unimplemented!()` body; tests live in + `crates/quarto-core/tests/attribution_generate.rs`) +- [x] `crates/quarto-core/src/transforms/attribution_render.rs` + (transform stub; tests live in + `crates/quarto-core/tests/attribution_render.rs`) +- [x] `crates/quarto-core/src/attribution/` — module created with + `{mod, types, source, builder, prebuilt, git_blame, palette, mode}.rs`. + All real logic is `unimplemented!()` until the relevant Phase + lands; the surface compiles so tests reference real APIs. + +### Test cases (Phase 0 — all must be red) + +1. **WASM-transport JSON round-trip with interning preservation.** The + transport-only types `TransportAttributionRun` / + `TransportAttributionData` (plain `String` actor fields) serde- + round-trip through JSON unchanged in three configurations: runs-only + (`identities` field empty → key omitted via + `skip_serializing_if = "HashMap::is_empty"`), identities-only (`runs` + field empty → key omitted via the analogous `AttributionMap::is_empty`), + both populated. **Plus a stronger assertion:** `PreBuiltAttributionProvider` + takes a transport JSON string, decodes it via the transport types, + feeds the result through `AttributionDataBuilder`, and the resulting + canonical `AttributionData` satisfies `Arc::ptr_eq(run.actor, + identities.get_key_value(...))` for every actor that appears in + both runs and identities — i.e. the round-trip *restores* the + interning invariant that serde alone would have destroyed (each + `Arc::from(s)` during deserialize would otherwise allocate + per-occurrence). This is the transport contract at the + hub-client → WASM boundary (Phase 3b); JSON serde is NOT used for + inter-transform passing inside the pipeline (the sidecar carries + the typed struct directly). (Plain `serde_test` for the transport + round-trip; ad-hoc assertion via `PreBuiltAttributionProvider` for + the ptr_eq restoration. Lives in `attribution/types.rs` and + `attribution/prebuilt.rs`.) +2. **`Vec::query_byte_range`** returns the most-recent + `(actor, time)` overlapping a query range, mirroring the TS + `attribution-runs.test.ts` (on `feat/node-attribution`) invariants. + Cover: empty, single-run, non-overlapping, overlapping with two + distinct actors, query at boundary. +3. **Git-blame provider parses porcelain identically to TS reference.** + Re-use the fixtures in + `hub-client/src/services/attribution-gitblame.test.ts` (on + `feat/node-attribution`); capture them as **checked-in porcelain + text** under `tests/attribution_gitblame_fixtures/` so the Rust + tests don't depend on live commit timestamps. Cover the same + multi-byte UTF-8 cases (CJK, emoji) the TS tests do. +4. **`AttributionGenerateTransform` happy path** — given a fixture provider + returning an `AttributionData` with runs + `[{0..5, alice, t=1}, {5..10, bob, t=2}]` and an `identities` map + `{ "alice": ("Alice", "#ff0000") }`, the transform sets + `ctx.attribution_data = Some(Arc::new(AttributionData { ... }))` + whose `runs.query_byte_range(0, 10)` returns `(bob, t=2)` and whose + `identities["alice"]` is `("Alice", "#ff0000")`. The actor `Arc` + inside each run is *pointer-equal* to the corresponding key in + `identities` (pin the interning invariant explicitly via + `Arc::ptr_eq`). Off-path: when the provider returns empty + `identities`, `ctx.attribution_data.identities.is_empty()` holds. +5. **`AttributionGenerateTransform` skip conditions:** + - No provider in `RenderContext` → `ctx.attribution_data` remains + `None`, no diagnostic. + - `is_feature_disabled(meta, "attribution")` → skip; + `ctx.attribution_data` remains `None`. (User-authored + `meta.attribution.identities`, if any, stays in meta exactly as + written but is not consumed.) + - **Identities-only user override** (positive case, not a skip): + `meta.attribution.identities` populated in YAML and provider opted + in → run the provider, build `AttributionData` with the provider's + `runs`, and merge identities per the Phase 2 merge rule (user + entries win on key collision; non-colliding user keys are + dropped). Pin via three sub-assertions: + (a) a key present in both user YAML and the provider's + identities resolves to the user's `(name, color)` in + `ctx.attribution_data.identities`, *and* the merged map's key + for that actor is `Arc::ptr_eq` to the provider's original key + (i.e. `Arc` provenance preserved so the `Arc::ptr_eq` + interning invariant from test #4 holds); + (b) a key present only in the provider survives the merge + unchanged; + (c) a key present only in user YAML does **not** appear in + `ctx.attribution_data.identities`. Sub-case (c) is the + regression guard against accidental re-introduction of the + dead-code path described in Phase 2. +6. **`AttributionRenderTransform` for q2-debug (producer-invariant + violation handling).** Given an AST with two `Str` nodes whose + `SourceInfo`s point to ranges `0..5` and `5..10`, and a + `ctx.attribution_data` whose `identities` map has an entry for + `alice` (`name: "Alice"`, `color: "#ff0000"`) and **deliberately + no** entry for `bob` (an in-test producer-invariant violation + constructed to exercise the render-side warning path — see + Phase 6), the transform emits exactly one diagnostic warning + naming `bob` as the offending actor, and the JSON writer emits + two sibling fields nested inside `astContext` (not peer to it): + - `astContext.attributionActors` — `{ "alice": { name: "Alice", color: "#ff0000" }, + "bob": { name: "", color: "#888888" } }`. One entry per + distinct actor referenced by the attribution array; the `bob` + entry came through the warning-path placeholder. Identity is + resolved **once per actor** (interned), not once per record. + This test pins the warning-path behaviour; on happy paths + (every actor identity-mapped by the producer) no diagnostic + fires and no placeholder appears — see Phase 0 test #11 for a + happy-path q2-debug fixture. + - `astContext.attribution` — sparse array of length 2: + `[{ s: , actor: "alice", time: 1 }, + { s: , actor: "bob", time: 2 }]`. + Three fields per record (`s`, `actor`, `time`), always present. + Identity (`name`, `color`) is **not** duplicated per record — it + lives once per actor in `astContext.attributionActors`. + + **Off-path regression:** when no attribution is in scope, both + `astContext.attributionActors` and `astContext.attribution` keys are **absent** + (not present-but-empty), making the JSON byte-identical to today's + output — assert this explicitly. (See "q2-debug delivery", below, + for the full schema.) +7. **`AttributionRenderTransform` for HTML (producer-invariant + violation handling).** Given the same fixture as test #6 (alice + has an identity entry, bob is deliberately omitted to exercise + the warning path), the transform emits one diagnostic warning + naming `bob`, and the HTML body contains all four attribution + attributes on each wrapped node: + - First wrapping span (alice, identity-resolved): `data-attr-actor="alice" + data-attr-time="1" data-attr-name="Alice" data-attr-color="#ff0000"`. + - Second wrapping span (bob, warning-path placeholder): + `data-attr-actor="bob" data-attr-time="2" data-attr-name="" + data-attr-color="#888888"`. + + Cover **both** block-level wrappers (`write_block_source_attrs`) and + inline wrappers (`write_inline_source_attrs`) — extend the fixture + with at least one block-level node carrying author attribution so + the test pins both paths. The `data-attr-*` attributes appear on the + **outer attribution wrapper** (see test #7b for the wrapper layering + when source-locations is also on). The attributes only appear when + attribution is opted in; without `ctx.attribution_data` the body is + byte-identical to current output (regression guard), and **no + diagnostic warning is emitted** in the absence of an invariant + violation. + +7b. **HTML coalescing** — given three contiguous `Inline::Str` nodes + whose lookups all return the same `(actor, time)` tuple (e.g. one + author writing a paragraph), the writer emits **one** outer + attribution wrapper `` covering all three + texts, not three. A fourth inline whose `(actor, time)` differs + starts a new outer wrapper; an inline with no attribution hit falls + outside both. When `include_source_locations` is also on, the + per-inline `data-sid`/`data-loc` spans become **inner** spans nested + inside the outer attribution wrapper: + + ``` + + word1 + word2 + + ``` + + When source-locations is off, text is written directly inside the + outer attribution span with no inner wrappers. Pins coalescing + semantics so a future writer refactor doesn't silently regress to + per-inline attribution wrapping. + +7c. **Attribution-on + source-locations-off composition.** Given the + same inline fixture as test #7 with `meta.include-source-locations` + set to `false` (or absent — same default), assert the rendered + HTML satisfies all three properties: + - **No `data-sid` or `data-loc` attributes anywhere** in the + output — not on the block opening tag, not on any inline span. + Grep-anti-assertion. + - **All four `data-attr-*` attributes present** on both the + block's opening tag (via the restructured + `write_block_source_attrs`) and the outer coalesced attribution + wrapper (via the coalescing pass). + - **Inner Str text inside the outer wrapper has no per-inline + `` wrapper** — text is emitted directly (the + `Inline::Str` handler's raw-text path at `html.rs:670` is + reached). + + This is the regression guard against re-coupling the two features. + A future refactor that, say, makes `write_attribution_attrs` + short-circuit through the existing `include_source_locations` + early-exit would fail this test loudly. Conversely, an + `attribution_render.rs` that "force on"s source-locations as a + side effect would fail the first sub-assertion (spurious `data-sid`). + +7d. **Structured inlines break prose coalescing.** Given a fixture + sequence `[Str("hello"), Code("world"), Str("foo")]` where all + three lookups return the same `(actor=alice, time=1)`, assert + the rendered HTML contains **three** attribution wrappers: + - one outer prose wrapper around `Str("hello")`, + - one own wrapper around the rendered `world`, + - one outer prose wrapper around `Str("foo")`. + + *Not* a single outer wrapper covering all three. Repeat the + pattern substituting `Inline::Emph`, `Inline::Link`, + `Inline::Span`, and `Inline::Math` for `Code` to pin that the + prose-only restriction (Phase 4b) applies symmetrically across + all structured inline variants. Regression guard against a + future refactor that "naturally" extends coalescing across + structured inlines and silently changes nesting semantics. +8. **`SourceInfo` chain resolution** — a node whose `SourceInfo` is + `Substring(parent=Original{0..20}, 5..10)` resolves to file 0, + bytes 5..10 *in the original file*, not 5..10 in the substring. This + already works for `map_offset` in `quarto-source-map/src/mapping.rs`; + the test pins it for the attribution lookup helper specifically, so a + future refactor can't silently regress it. + +8b. **`AttributionRenderTransform` skips non-primary-file nodes.** + Given an AST containing one node whose `SourceInfo` resolves to + `(file_id=0, 0..5)` (a hit on the primary doc's attribution map) + and a second node whose `SourceInfo` resolves to `(file_id=1, + 0..5)` — e.g. a node spliced in via `{{< include other.qmd >}}` + whose byte range happens to overlap a run in the primary doc's + `AttributionMap` — the resulting lookup vec has a record at the + first node's pool index and `None` at the second's. Pins the v1 + "primary doc only" invariant against the silent byte-range- + collision failure mode described in Open Question #2. The + fixture deliberately uses an overlapping byte range so that + *only* the `file_id` filter (not range absence) explains the + second node's `None`. +9. **End-to-end CLI test** — the test builds a temp git repo using + the deterministic-timestamp setup spelled out in Phase 3a § + Test fixtures (`tempdir` + `git init` + two scripted commits by + distinct authors with `GIT_AUTHOR_DATE` / `GIT_COMMITTER_DATE` / + `GIT_AUTHOR_EMAIL` / `GIT_COMMITTER_EMAIL` / + `GIT_AUTHOR_NAME` / `GIT_COMMITTER_NAME` pinned), copies + `tests/fixtures/attribution-blame/doc.qmd` into the tempdir, then + runs `cargo run --bin quarto -- render /doc.qmd --to + html --attribution=git`. Asserts the produced HTML contains + `data-attr-actor=""` strings matching the two scripted + author emails. Per CLAUDE.md the plan must include this end-to-end + test for any CLI-visible feature; no claiming "done" without + inspecting the rendered HTML. + +9b. **CLI/YAML mode resolution — three-state matrix.** Unit test on the + pure resolution function that takes + `(cli: Option, yaml: Option) -> + Option` (Phase 3c). Pin every combination so + "silent override on CLI/YAML conflict" can't regress: + - `(None, None)` → `None`. (Unflagged default.) + - `(None, Some(Off))` → `Some(Off)`. (YAML opts out.) + - `(None, Some(Git))` → `Some(Git)`. (YAML opts in.) + - `(Some(Off), None)` → `Some(Off)`. (CLI escape hatch.) + - **`(Some(Off), Some(Git))` → `Some(Off)`. CLI wins on conflict + — the escape-hatch path. This is the regression guard the prior + review specifically called out.** + - `(Some(Git), None)` → `Some(Git)`. (CLI opts in standalone.) + - `(Some(Git), Some(Off))` → `Some(Git)`. (CLI overrides YAML + opt-out — symmetrical to the escape-hatch case.) + - `(Some(Git), Some(Git))` → `Some(Git)`. (Trivial agreement.) + + Then a small integration assertion: when the resolved mode is + `Some(Off)` or `None`, the `RenderContext` constructed by + `render_document_to_file` has `ctx.attribution_provider.is_none()` + — the CLI plumbing must not install a `GitBlameProvider` for + either case. (Pure unit test on the resolution function plus one + integration test on the `RenderContext` construction; lives next + to the `RenderToFileOptions` → `RenderContext` plumbing site + introduced in Phase 3c.) +10. **WASM byte-identicality fixture sweep.** For every existing + q2-debug fixture (the corpus that today drives + `parse_qmd_to_ast`), assert that + `parse_qmd_to_ast_with_attribution(content, None)` produces output + byte-identical to `parse_qmd_to_ast(content)`. This is the + structural test that backs the Phase 3b byte-identicality + invariant: the `None` branch must never silently alter the + existing q2-debug surface, since the latter delegates to the + former. Runs as a parameterised test over the fixture corpus, not + a single point assertion. +11. **q2-debug attribution-on, happy path (every actor identity-mapped).** + Given a small qmd fixture with two contiguous Str nodes whose + `SourceInfo`s span the byte ranges in an `AttributionData` + constructed in-test, where **every actor referenced in `runs` has + an entry in `identities`** (satisfying the Phase 6 producer + invariant), invoke the q2-debug path + (`parse_qmd_to_ast_with_attribution(content, Some(json))`) and + assert the resulting `astContext.attribution` array and + `astContext.attributionActors` table match the expected sparse records. + Crucially: assert that the `astContext.attributionActors` entries come from + the in-test `identities` (not the warning-path placeholder) and + that **no diagnostic warnings are emitted** — this is the + happy-path counterpart to test #6's invariant-violation case, so + the warning code path is *not* exercised here. Distinct fixture + from test #6 — see Phase 4d for why the two invocation paths + cannot share a fixture. +12. **GitBlameProvider producer-invariant.** Given two `tests/fixtures/` + git porcelain captures (one two-author, one N-author), assert that + the `AttributionData` returned by `GitBlameProvider::build(...)` + satisfies: every actor referenced by `runs` has an entry in + `identities`, and each entry's `display_name` equals the + mail-local-part and `color` equals `actor_color(fnv1a_hex8(email))`. + Pin the deterministic colour for at least one known email + (e.g. `alice@example.com → hsl(, 60%, 50%)`) so a future + refactor of `fnv1a_hex8` can't silently shift hues. + +### Snapshot tests + +- [ ] `crates/quarto-core/snapshots/attribution_generate__*` — one per + skip condition + one happy path. **Deferred to Phase 2/4c**: + while the generate transform body is `unimplemented!()`, snapshot + output would be a panic. Phase 0 covers this surface via structured + assertions on `ctx.attribution_data` and `ctx.format_options` + (tests #4–#7 in `attribution_generate.rs` / `attribution_render.rs`). +- [x] HTML off-path baseline snapshot at + `crates/quarto-core/tests/snapshots/attribution_baseline_snapshot__attribution_off_baseline.snap`. + Asserts a small attribution-free document renders to the same HTML + body it does today; GREEN immediately. Backs the Phase 4 + "byte-identical when off" promise as a mechanical regression guard. + The plan's original two-file split + (`attribution_render_html__off` + `_on`) is unnecessary: the off + baseline is the one that must never drift; the on case is exercised + by Phase 4b's coalescing tests via structured DOM assertions. +- [ ] No snapshot test for the q2-debug JSON: it'll churn whenever AST IDs + change. The structured assertion in **Phase 0 test #6** is the + substitute: when `attribution_lookup` is `None`, both the + `astContext.attribution` and `astContext.attributionActors` keys are absent + from the output. Combined with the + `#[serde(skip_serializing_if = …)]` annotations pinned in Phase 4a + (`Vec::is_empty` on `attribution`, `HashMap::is_empty` on + `attribution_actors`), "keys absent when off" is mathematically + equivalent to "JSON byte-identical to today's output" — serde + skips both fields, no other code path changes — so a snapshot + would be redundant. + +## Phase 1 — Canonical types and provider trait + +- [x] Create `crates/quarto-core/src/attribution/mod.rs` with: + - `AttributionRun { start: usize, end: usize, actor: Arc, time: i64 }` + — the canonical in-memory shape. `actor` is `Arc` (not + `String`) — shared across all runs by the same author; see + Vocabulary for the rationale. **`Serialize` only**, no + `Deserialize` derive: deserialization goes through the + transport types below, then through `AttributionDataBuilder`, + so the interning invariant is restored on the way back in (a + plain `Deserialize for Arc` would re-allocate per-occurrence + and silently regress the memory cost claimed in Vocabulary). + - `AttributionMap` as a transparent newtype around `Vec` + (`#[serde(transparent)]` so the JSON form is a flat array). The + in-memory queryable form. No file keying in v1. Provides an + `is_empty(&self) -> bool` helper for `skip_serializing_if`. + Same `Serialize`-only treatment as `AttributionRun`. + - `IdentityMap = HashMap, Identity>` (keyed by the same + `Arc` instances used in `AttributionRun.actor`) where + `Identity { display_name: String, color: String }` + serde derives. + - `AttributionData { runs: AttributionMap, identities: IdentityMap }` + + `Serialize` derive (no `Deserialize`; see above). **The canonical + in-memory shape**, held as `Arc` on + `RenderContext.attribution_data` (the sidecar). Not stored in + `ast.meta`. `Serialize` exists *solely* for the WASM transport + boundary (Phase 3b); both fields use + `#[serde(default, skip_serializing_if = "…is_empty")]` so runs-only + and identities-only transport payloads serialize compactly. + - **Transport-only mirror types** for the JSON deserialize path: + `TransportAttributionRun { start: usize, end: usize, actor: String, + time: i64 }` and `TransportAttributionData { runs: + Vec, identities: HashMap }`, both with `Serialize + Deserialize`. The wire shape + is identical to the canonical types' `Serialize` form (`Arc` + and `String` both serialize as JSON strings), so round-tripping + canonical → JSON → transport → builder → canonical preserves data; + the only thing the transport detour buys is a clean place to + re-intern. + - **`AttributionDataBuilder`** — the single entrypoint every + producer uses to construct an `AttributionData`. Owns an internal + `HashMap>` intern map; exposes: + - `fn intern_actor(&mut self, actor: &str) -> Arc` — returns + the canonical `Arc` for `actor`, allocating once on first + sight and `Arc::clone`-ing thereafter. + - `fn push_run(&mut self, start: usize, end: usize, actor: Arc, + time: i64)` — actor argument *must* be the value returned by + `intern_actor` (enforced by convention, not the type system — + document this in the doc-comment). + - `fn set_identity(&mut self, actor: Arc, id: Identity)`. + - `fn build(self) -> AttributionData`. + + Doc-comment must state the invariant the builder enforces: "Every + `AttributionRun.actor` in the built `AttributionData` is + `Arc::ptr_eq` to the corresponding key in `IdentityMap` by + construction." All three callsites (the two providers and test + fixtures) go through this builder; no producer should ever + construct `AttributionRun` literals with ad-hoc `Arc::from(s)`. + - `pub trait AttributionSourceProvider: Send + Sync` with a single + method `fn build(&self, ctx: &RenderContext) -> Result`. + **The method is sync, not async.** Locked-in rationale: + - The only blocking implementor is `GitBlameProvider`, which + spawns one `git blame --porcelain` subprocess (~tens of ms on + typical document-sized files, long-tail ~1s on very large + repos). v1's native render is single-document-at-a-time, so + the calling thread has no other work to compete with. + - The WASM implementor (`PreBuiltAttributionProvider`) is purely + sync (JSON parse + intern loop, sub-millisecond). An async + signature would force it through a degenerate `async fn` body + containing zero `.await`s — a real code smell. + - A future caller that needs cooperative scheduling can wrap the + sync `build` in `tokio::task::spawn_blocking` at the call site + without touching the trait. The reverse (async trait, sync + caller via `block_on`) is uglier and runtime-specific. + + Doc-comment on the method must state: "May block. Implementations + that spawn subprocesses or do other blocking I/O should document + expected latency. Currently: `GitBlameProvider` blocks on a + `git blame --porcelain` subprocess (tens of ms typical, ~1s on + huge repos); `PreBuiltAttributionProvider` is non-blocking." + + Each provider returns the data shape that's natural for it + (`GitBlameProvider` returns runs + synthesized identities in v1; + `PreBuiltAttributionProvider` returns whatever hub-client shipped, + re-interned). Both providers route construction through + `AttributionDataBuilder`. + - `pub trait AttributionSource: Send + Sync` with + `fn query_byte_range(&self, start: usize, end: usize) -> Option`. + No `file_id` parameter — single-doc invariant. (v2 extension noted in + Open Question #2.) + - Blanket impl: `impl AttributionSource for AttributionMap` via binary + search over the runs. +- [x] Add **two new fields** to `RenderContext` + (`crates/quarto-core/src/render.rs:84-188` as of #169, which added + `resolved_listings` at line 187). Defaults are `None`; nothing in the + existing pipeline should observe a behavior change. Both fields + carry single-writer / single-reader doc-comments so the entire + Finalization Phase (which runs between Generate and Render with + `attribution_data` populated) is forced to treat the slot as + opaque. + - `pub attribution_provider: Option>` + — opt-in signal set by the CLI flag plumbing (Phase 3c) or the + WASM entry point (Phase 3b). Read by `AttributionGenerateTransform` + only. Doc-comment: "Set by the CLI flag plumbing (Phase 3c) or + the WASM entry point (Phase 3b). Read by + `AttributionGenerateTransform`. No other transform should + consult this field." + - `pub attribution_data: Option>` — the + sidecar carrying the canonical merged form. Written by + `AttributionGenerateTransform`; read by `AttributionRenderTransform`. + No other transform reads or writes this field. Doc-comment: + "Written by `AttributionGenerateTransform`; read by + `AttributionRenderTransform`. **No other transform reads or + writes this field.** The entire Finalization Phase runs between + Generate and Render with this slot populated; future + Finalization transforms must treat it as opaque." + `Arc` so the value travels between transforms (and into the writer + config) without re-copying. +- [x] `pub fn format_supports_attribution(format: &Format) -> bool` — + returns `true` for formats whose writers consume the lookup (HTML and + q2-debug JSON in v1) and `false` otherwise (PDF, Typst, plain Pandoc + native, etc.). Used by `AttributionGenerateTransform`'s skip ladder + to short-circuit before invoking the provider; opting in to + attribution on a non-consuming format would otherwise fire a + `git blame` subprocess whose output goes nowhere visible. +- [x] Small helper `from_config_value(meta: &ConfigValue) -> IdentityMap` + to read user-authored `meta.attribution.identities` (a small + `ConfigValue::Map` from YAML parse) into an `IdentityMap` for the + merge step in Phase 2. This is the *only* attribution-related + `ConfigValue` ↔ Rust-struct converter the plan ships; the bulk + `runs` path never visits `ConfigValue`. + +**Why a sidecar field on `RenderContext` rather than `meta.attribution`:** +see the "Why a sidecar, not `meta.attribution`" subsection at the end of +Vocabulary. Short version: `ConfigValue::Map` representation of `AttributionRun` +records is ~20× heavier per run than the typed struct, and the +hypothetical Lua-filter introspection it would enable wouldn't be useful +in practice (raw runs aren't a queryable shape; the right Lua surface is +a `lookup(start, end)` accessor when bd-0fd0 lands). User-authored +`meta.attribution.identities` still rides the convention. + +## Phase 2 — `AttributionGenerateTransform` + +- [ ] New file `crates/quarto-core/src/transforms/attribution_generate.rs` + modelled on `navbar_generate.rs`. (The 94-line size cited in an earlier + draft is now stale — `navbar_generate.rs` has grown to ~414 lines with + project-index enrichment; attribution-generate has no equivalent + enrichment step, so target the original "small + tests" footprint, not + the current navbar size.) +- [ ] Skip / merge ladder, in this order. User-authored runs aren't a + valid surface (users do not hand-author thousands of byte-range + tuples in YAML); the only legitimate user override is identities, + which the merge step in rule 4 handles: + 1. `!format_supports_attribution(&ctx.format)` → bail. The current + format's writer doesn't consume the lookup (PDF, Typst, plain + Pandoc native, etc.), so running the provider would do nothing + visible — and on the git-blame branch would needlessly spawn a + subprocess. Checked first so `attribution: git`-style project YAML + doesn't pay the cost on non-HTML targets. + 2. `is_feature_disabled(&ast.meta, "attribution")` (affirmative + `false`) → bail; `ctx.attribution_data` remains `None`. + User-authored `meta.attribution.identities`, if any, stays in + meta as written but is not consumed. + 3. `ctx.attribution_provider.is_none()` (no opted-in source) → bail. + 4. Otherwise (provider opted in): call + `provider.build(ctx)?` → `AttributionData` (the provider has + already routed construction through `AttributionDataBuilder`, + so its `runs[i].actor` Arcs are `ptr_eq` to its `identities` + keys). Then **merge with any user-supplied + `meta.attribution.identities`** (read via `from_config_value` + helper from Phase 1): take the provider's `runs` as-is; for + `identities`, on key collision **preserve the provider's + `Arc` as the map key and overwrite only the `Identity` + value** with the user's. Non-colliding user keys (an actor + named in YAML but absent from the provider's runs) are + **dropped, not unioned** — see "Why drop non-colliding user + keys" below. Store as + `ctx.attribution_data = Some(Arc::new(AttributionData { runs, identities: merged }))`. + + **Why preserve provider keys on collision.** The user's + `IdentityMap` from `from_config_value` was built from a + `ConfigValue::Map` and its keys are fresh `Arc` allocations + unrelated to any `AttributionRun.actor`. If those keys *replaced* + the provider's keys on collision, every `AttributionRun` for + that actor would point at a different `Arc` than the map + key, breaking the `Arc::ptr_eq` interning invariant pinned in + Phase 0 test #4. Replacing only the value preserves the + invariant — `HashMap::insert` returns the old value but keeps + the existing key, which is exactly what we need; concretely, + `if let Some(slot) = merged.get_mut(&user_key) { *slot = user_id; }` + (no `else` branch — non-colliding entries are not inserted). + + **Why drop non-colliding user keys.** The render walk + (Phase 4c) prunes `attribution_actors` to actors referenced by + `attribution_lookup` — i.e. actors with at least one run in + *this* document. A user-supplied identity for an actor with no + runs is therefore invisible at the writer, and inserting it + into the merged map is dead work. If a future v2 feature wants + cross-doc aggregation (a project sidebar listing all + contributors a project knows about), the right home for "people + the project knows" is a project-level identities table built in + the `ProjectIndex`, not a per-doc sidecar; v2 introduces it + separately and the v1 merge stays minimal so its surface + doesn't have to change under that work. +- [ ] Register the stage in `pipeline.rs` as the **last entry in the + Navigation Phase**, immediately **after** `FooterRenderTransform` + (currently line 847) and immediately **before** the Finalization Phase + begins with `LinkRewriteTransform` (currently line 857). Rationale: + (a) `AttributionGenerateTransform` is a `*-generate` stage and reads + naturally with the other generate transforms; (b) end-of-phase + placement puts it *after* all the website / navigation transforms + rather than interleaved with them, so it doesn't sit "in the middle" + of unrelated stages; (c) end-of-Navigation-Phase gives a stable + insertion contract: any new navigation transform added later goes + **before** this stage by the same rule, so the position never has to + be re-litigated. The historical alternative (slotting between + `FooterGenerateTransform` and `ListingGenerateTransform` — inside the + phase rather than at its tail) was rejected — see the design + discussion in the Overview. + +## Phase 3 — Provider implementations + +### 3a. Git-blame provider (native) + +**Implementation choice: shell out to the `git` binary.** Rejected +alternatives: + +- `git2` (libgit2 C bindings) — drags libgit2 into the Rust workspace + (cross-compilation pain on Windows MSVC and musl), blame doesn't always + match real `git blame` (mailmap handling, `--follow`, whitespace + heuristics), doesn't ride the user's gitconfig. +- `gix` (gitoxide) — pure Rust but blame is one of its newer subsystems + with edge cases, large crate fan-out, still doesn't match real git in + every corner. + +Shelling out keeps zero new build deps, matches the TS prototype line-for-line, +honours the user's gitconfig / `.mailmap` / `core.autocrlf`, and adds nothing to +the WASM build (which doesn't need git). Subprocess overhead (~tens of ms per +file) is fine for `quarto render`; if project-wide rebuild scaling ever +matters, revisit then. + +- [ ] Extend `BinaryDependencies` (`crates/quarto-core/src/render.rs:32-44`) + with a `pub git: Option` field, and add the matching lookup + inside `BinaryDependencies::discover` (`pub fn discover` is at line 53; + the body assembles fields starting at line 54): + ```rust + git: runtime.find_binary("git", "QUARTO_GIT"), + ``` + This is the single corrective for review issue #1; without it the plan's + earlier claim that "binaries owns git discovery already" is false. +- [ ] `crates/quarto-core/src/attribution/git_blame.rs`: + - Pure-Rust port of `attribution-gitblame.ts`. Spawns + `git blame --porcelain` using `ctx.binaries.git` (now populated by the + item above). The provider does **not** invoke `git` from `$PATH` + directly — always go through `BinaryDependencies` so `QUARTO_GIT` + overrides work the same way as `QUARTO_PANDOC` etc. + - Multi-byte UTF-8 line lengths via `s.as_bytes().len()` — TextEncoder + equivalent. + - Returns a **complete** `AttributionData { runs, identities }` for + the current document, constructed via `AttributionDataBuilder` + (Phase 1) — never by literal struct construction with ad-hoc + `Arc::from(s)` calls. Concretely: the parser maintains a single + `AttributionDataBuilder`; on each commit-header block it calls + `builder.intern_actor(&email)` once to obtain the canonical + `Arc`, then `builder.set_identity(actor.clone(), ...)`; on + each content line it calls `builder.push_run(start, end, + actor.clone(), time)` re-using that same `Arc`. The same + email seen on N content lines produces N `Arc::clone` calls and + one underlying allocation. `runs` is the parsed porcelain output + for the primary file in v1. `identities` contains one entry per + distinct `author-mail` seen in the porcelain stream, satisfying + the Phase 6 producer invariant (every actor referenced in `runs` + has an entry in `identities`): + - `display_name = email.split_once('@').map(|(local, _)| local).unwrap_or(email)` + (the mail-local-part; falls back to the full string for + pathological emails without `@`). + - `color = actor_color(fnv1a_hex8(email))`. The email is + pre-hashed because `actor_color` parses the first 6 hex chars + of its input — an email like `charlie.gao@posit.co` would + yield colour collisions across every author whose email shares + a hex-prefix-friendly leading run. FNV-1a (defined in + `palette.rs`, see Phase 6) produces a uniformly-distributed + 8-char hex string, ensuring per-email hue distribution. + Synthesizing identities in the producer (rather than detecting + email-shaped actors at render time) keeps the render stage + source-agnostic — see Phase 6 § Identity resolution. + - **Graceful degradation, not a render failure.** When + `--attribution=git` is passed but (a) `ctx.binaries.git` is `None` + (git not on PATH / `QUARTO_GIT` unset) or (b) the document isn't + inside a git working tree, emit a `DiagnosticMessage` warning and + return an empty `AttributionData` (`runs = []`, `identities = {}`). + The pipeline then behaves as if + attribution were off — the render succeeds, just without + `data-attr-*` attributes. Rationale: a missing git binary should be + a soft signal, not a broken build. + - Test fixtures: two independent paths, deliberately not sharing a + real `.git/` directory. + - **Parsing tests** (Phase 0 #3): checked-in `git blame + --porcelain` text under `tests/fixtures/attribution-blame/` so + the parser unit tests don't depend on live timestamps. The + porcelain text was captured once from a hand-built repo and + committed verbatim; regenerating it later requires re-running + the same capture (a one-line shell command documented in + `tests/fixtures/attribution-blame/REGEN.md` so a future + maintainer can refresh it without spelunking). + - **End-to-end CLI test** (Phase 0 #9): the test itself builds + a temp git repo on every invocation. `tempdir` + `git init` + + two scripted commits by distinct authors, all with + `GIT_AUTHOR_DATE` / `GIT_COMMITTER_DATE` / + `GIT_AUTHOR_EMAIL` / `GIT_COMMITTER_EMAIL` / + `GIT_AUTHOR_NAME` / `GIT_COMMITTER_NAME` pinned to fixed + values so commit hashes and porcelain output are + bit-deterministic across runs and across machines. The + source `.qmd` lives under `tests/fixtures/attribution-blame/` + and is copied into the tempdir at test start; nothing under + `.git/` is committed to the working tree. This keeps the + repo free of binary blobs (no committed `.git/` dirs, no + tarballs) and avoids the git-inside-git tooling surprise of + a nested working tree. + +### 3b. Automerge runs provider (WASM) + +- [ ] `crates/wasm-quarto-hub-client/src/attribution.rs` (a WASM-only module). + Hub-client computes the runs in JS today. Two delivery options: + + **Option A (committed for v1): hub-client preserves its TS replay + code, ships a serialized `AttributionData` (i.e. + `{ runs: [...], identities: {...} }`) across the WASM boundary as + a JSON string parameter to `parse_qmd_to_ast_with_attribution`.** The Rust side wraps the + string in a `PreBuiltAttributionProvider` and stores it on + `RenderContext.attribution_provider`; the actual JSON parse + + interning happens inside the provider's `build()`, not at the + WASM entry point — keeping construction in one place (Phase 1's + `AttributionDataBuilder`) instead of two. + + Full definition (lives at + `crates/quarto-core/src/attribution/prebuilt.rs` — see "Where + `PreBuiltAttributionProvider` lives" below): + + ```rust + /// Wraps a hub-client-supplied transport JSON string and decodes + /// it on demand into a canonical `AttributionData`. + /// + /// The JSON is parsed lazily inside [`build`] rather than at + /// construction time so that: + /// - construction is infallible (no `Result` at the WASM entry + /// point), and + /// - the parse + intern step lives behind the same + /// `AttributionSourceProvider` trait surface as + /// `GitBlameProvider`, so a future caller cannot distinguish + /// the two by where errors surface. + pub struct PreBuiltAttributionProvider { + json: String, + } + + impl PreBuiltAttributionProvider { + pub fn new(json: String) -> Self { + Self { json } + } + } + + impl AttributionSourceProvider for PreBuiltAttributionProvider { + fn build(&self, _ctx: &RenderContext) -> Result { + let raw: TransportAttributionData = + serde_json::from_str(&self.json) + .map_err(|e| /* wrap into the project's error type */)?; + let mut b = AttributionDataBuilder::new(); + // identities first so the intern map sees provider-supplied + // actor strings before any runs that reference them + for (k, id) in raw.identities { + let actor = b.intern_actor(&k); + b.set_identity(actor, id); + } + for r in raw.runs { + let actor = b.intern_actor(&r.actor); + b.push_run(r.start, r.end, actor, r.time); + } + Ok(b.build()) + } + } + ``` + + The impl is plain (no `#[async_trait]` attribute) because the + trait's `build` method is sync — see Phase 1 § trait definition + for the locked-in rationale. The `_ctx: &RenderContext` parameter + is unused for this provider but must match the trait signature — + the canonical form is fully determined by the transport JSON, so + `RenderContext` carries no useful signal for the prebuilt path. + + This is the *only* path that crosses the transport boundary. The + builder restores the interning invariant that serde's default + `Deserialize for Arc` would have destroyed (each occurrence + of the same actor string in the JSON would otherwise allocate a + fresh Arc), so `PreBuiltAttributionProvider` ends up structurally + indistinguishable from `GitBlameProvider` for downstream consumers. + Phase 0 test #1 strengthens the round-trip assertion to pin this + explicitly. The JSON payload is *transport-only* — once parsed it + lives as a typed Rust struct on the sidecar + (`ctx.attribution_data`), never visiting `ast.meta`. Pros: no + automerge-rs in the WASM bundle (~hundreds of KB saved), no + duplicate replay implementations, runs and identities ride one + channel. Cons: the canonical form is computed in TS, not Rust — + but the transport JSON is decoded into the same typed shape any + provider produces, so this is fine. + + **Where `PreBuiltAttributionProvider` lives.** In + `crates/quarto-core/src/attribution/prebuilt.rs`, not in + `wasm-quarto-hub-client`. It depends only on `AttributionData`, + `AttributionDataBuilder`, and `serde_json` (all already in + `quarto-core`), and has no WASM-specific code. Keeping it in + `quarto-core` lets the producer-invariant tests (Phase 0 test #1 + ptr_eq restoration, Phase 0 test #12 fixture sweep) run as + native unit tests on the canonical types' home crate, and lets + any future native caller that has a pre-built JSON payload (e.g. + `--attribution-from-file=…`) use it without reaching into the + WASM crate. + + **Direct-invocation flow (not via the transform pipeline registration).** + The Rust-side `pipeline::parse_qmd_to_ast` runs only three stages + today (`ParseDocumentStage` → `EngineExecutionStage` → + `MetadataMergeStage`) — `AstTransformsStage` is **not** in that + list. So unlike the HTML path, the q2-debug WASM path cannot pick + up attribution transforms via the `build_transform_pipeline` + registration (Phase 2's end-of-Navigation-Phase insertion and + Phase 4's end-of-Finalization-Phase insertion). The WASM entry + point invokes them **directly** after the existing 3-stage parse: + + ``` + 1. If attribution_json is Some(s): install + PreBuiltAttributionProvider::new(s) on ctx.attribution_provider. + Else: leave ctx.attribution_provider as None. + (The JSON is NOT parsed at this point — the provider holds the + raw string and parses+interns lazily inside build(); see Option A + above.) + 2. Run pipeline::parse_qmd_to_ast(content, …, &mut ctx, runtime) + → AstOutput (unchanged from today). + 3. If ctx.attribution_provider.is_some(): + AttributionGenerateTransform::new() + .transform(&mut output.ast, &mut ctx).await?; + // ↑ this is where provider.build() runs — JSON parse + intern + AttributionRenderTransform::new() + .transform(&mut output.ast, &mut ctx).await?; + Else: skip — output is identical to today's parse_qmd_to_ast. + 4. Build JsonConfig with attribution_lookup / attribution_actors + pulled from ctx.format_options.json (Phase 4c populates these). + 5. Serialize via pampa::writers::json::write_with_config. + ``` + + This deliberately diverges from the HTML path's + `build_transform_pipeline` registration to preserve the existing + q2-debug surface ("what did the parser see"): we do *not* slot + the full `AstTransformsStage` into `parse_qmd_to_ast`, because + doing so would suddenly fire callout/navbar/sectionize/etc. on + the AST debug view — a substantial behavior change unrelated to + attribution. + + Rejected alternatives: + - Adding `AstTransformsStage` to `parse_qmd_to_ast`'s stage list — + behavior change as above. + - Building a new `AstTransformsStage::attribution_only()` + constructor — adds API surface to the stage just for this case; + direct invocation needs none of the StageContext bridging + `AstTransformsStage` provides (see Invariant below). + + **Option B (deferred to v2 consideration): link `automerge-rs` + into wasm-quarto-hub-client and replay history in Rust.** Pro: + one source of truth. Con: bundle size and a duplicate of + perfectly-good TS code. **v1 commits to Option A unconditionally** + — see Open Questions § #3 for the locked-in rationale. + +- [ ] Document Option A's parameter in + `crates/wasm-quarto-hub-client/CLAUDE.md`. **The q2-debug entry point + is `parse_qmd_to_ast` (`crates/wasm-quarto-hub-client/src/lib.rs:855`, + signature `pub async fn parse_qmd_to_ast(content: &str) -> String`), + not `render_qmd` (line 1005).** `render_qmd` is the HTML preview path, + which Phase 5 explicitly puts out of scope for v1 ("hub-client's HTML + preview tab does not display attribution"). New entry point signature: + ```rust + pub async fn parse_qmd_to_ast_with_attribution( + content: &str, + attribution_json: Option, // JSON-encoded { runs, identities } + ) -> String + ``` + Note the **content-based** signature (matching the existing + `parse_qmd_to_ast`), not path-based. No `user_grammars` parameter — + `parse_qmd_to_ast` doesn't take one today; if grammar support is + needed later, both functions add the parameter together. Existing + `parse_qmd_to_ast` keeps its current signature; the with-attribution + variant is opt-in to keep the diff small. **`parse_qmd_to_ast` + becomes a thin wrapper that calls + `parse_qmd_to_ast_with_attribution(content, None)` and returns its + result directly** — no additional cfg branches, no + separately-collected diagnostics, no extra trace events, no + difference in error reporting, no pre- or post-processing of any + kind. The body is one line plus a doc-comment ("`Equivalent to + parse_qmd_to_ast_with_attribution(content, None)`. Kept as a + separate entry point for callers that have no attribution to ship + and want the simpler signature."). This is what makes the + byte-identicality invariant below mechanical rather than + aspirational; a future maintainer adding a side effect to + `parse_qmd_to_ast` directly (instead of to the underlying + `parse_qmd_to_ast_with_attribution`) would silently break the + invariant, so the wrapper shape is the contract. + + **Byte-identicality invariant:** `parse_qmd_to_ast(content)` is + byte-identical to `parse_qmd_to_ast_with_attribution(content, None)` + for every fixture. The wrapper-with-no-extra-side-effects shape + above makes this true by construction: when `attribution_json` is + `None`, the provider isn't installed, step 3 in the recipe above + is skipped, and the resulting bytes come from the same 3-stage + pipeline + JSON serializer as today. The delegation is therefore + safe — every existing q2-debug render silently routes through the + new function, and a regression on the `None` branch would break + *all* renders, not just attribution ones. Phase 0 test #6 doubles + down on this with a structured assertion ("`astContext.attribution` + and `astContext.attributionActors` keys absent when off"), and + Phase 0 test #10 asserts byte-identicality across the existing + q2-debug snapshot corpus. + + **Transform-invocation invariant:** `AttributionGenerateTransform` + and `AttributionRenderTransform` MUST depend on `RenderContext` + only, never on `StageContext`. The HTML path drives them through + `AstTransformsStage`, which bridges `StageContext ↔ RenderContext` + (diagnostics, resource_report, project_index, …); the q2-debug + path calls them directly outside any `Pipeline`, with no bridge. + Both invocation paths must produce identical results for the same + inputs. Pin this in the file-level doc-comment of each transform + ("Reads/writes only fields on `RenderContext`; no `StageContext` + access") so a future refactor that reaches for `stage_ctx.foo` + fails the q2-debug path immediately. + + **Phase 9 entry point (`render_page_in_project`, line 1292) is + unchanged:** it drives the project-aware HTML preview path, which is + out of scope for v1. Attribution does not flow through it; the + `--attribution=git` CLI flag is the only HTML-preview path that ships + in v1, and it goes through the native CLI binary, not the WASM + entry points. + +### 3c. CLI flag plumbing (native) + +- [ ] Define a typed mode enum (lives in `quarto-core`, alongside the + attribution module so YAML and CLI both depend on the same type): + ```rust + #[derive(Debug, Clone, Copy, PartialEq, Eq, clap::ValueEnum, + serde::Serialize, serde::Deserialize)] + #[value(rename_all = "kebab-case")] + #[serde(rename_all = "kebab-case")] + pub enum AttributionMode { + Off, + Git, + } + ``` + Reject `Option` — clap's `ValueEnum` gives typed parsing, + validated values, and auto-generated help listing alternatives. +- [ ] Extend `RenderArgs` in + `crates/quarto/src/commands/render.rs:40-67` with: + ```rust + pub attribution: Option, + ``` + Three CLI states, all distinct: + - flag absent → `None`, defer to YAML + - `--attribution=git` → `Some(AttributionMode::Git)`, force git + (overrides YAML) + - `--attribution=off` → `Some(AttributionMode::Off)`, force off + (escape hatch when project YAML has `attribution: git`) +- [ ] Plumb through `RenderToFileOptions` + (`crates/quarto-core/src/render_to_file.rs:83-111`) and into the + construction of `RenderContext`. Resolution order (matches how `--to` + and `format:` reconcile today): CLI value (if `Some`) wins; otherwise + YAML value; otherwise off. When the resolved value is + `AttributionMode::Git`, install `Arc::new(GitBlameProvider::new())` as + `ctx.attribution_provider`. When `Off`, leave it `None` — same code + path as the unflagged default. Silent override on YAML/CLI conflict + (no diagnostic); the Phase 3a graceful-degradation path handles the + "git mode requested but git is unusable" case regardless of how the + mode arrived. +- [ ] YAML alternative: top-level `attribution:` in the document or + project YAML accepts the same three states. + - `attribution: git` → `Some(AttributionMode::Git)` + - `attribution: off` *or* `attribution: false` → `Some(AttributionMode::Off)` + - key absent → `None` + + **Not** valid as a CLI or YAML value: `automerge`. Automerge attribution + is hub-client-only and reaches the pipeline through the + `parse_qmd_to_ast_with_attribution` WASM entry point (Phase 3b), which + never consults `AttributionMode`. Keeping `automerge` out of this enum + prevents the CLI from advertising a capability it doesn't have. + +## Phase 4 — `AttributionRenderTransform` + +This is the heart of the format-specialisation contract. The transform reads +`ctx.attribution_data` once and produces output that the downstream writer +for the *current `Format`* can act on. Today that's two writers: + +### 4a. q2-debug delivery + +- [ ] q2-debug is a pseudo-format that aliases to `html` for the body writer + (`crates/quarto-core/src/format.rs:108`). The hub-client AST renderer + consumes the JSON output of `parse_qmd_to_ast`. So for q2-debug the + delivery shape is: + - Resolve every node's `SourceInfo` to a `(file_id, start, end)` byte + range using the existing chain-resolution logic in + `quarto-source-map/src/mapping.rs:15-87`. Done **once** in + `AttributionRenderTransform`, not per-writer-call. + - Query `AttributionMap` for the most-recent `(actor, time)`. + - **Wire shape (canonical):** emit two sibling fields nested + **inside `astContext`** (not peer to it): + - `astContext.attributionActors` — actor → resolved `(name, color)`. One + entry per distinct actor referenced by the attribution array. + The producer runs the Phase 6 identity-resolution chain **once + per actor** (interned during the AST walk), not once per record. + - `astContext.attribution` — sparse array of three-field records. + + Schema: + ```json + { + "astContext": { + "files": [...], + "sourceInfoPool": [...], + "attributionActors": { + "": { "name": "", "color": "" } + }, + "attribution": [ + { "s": , "actor": "", "time": }, + ... + ] + }, + ... + } + ``` + Records carry `s`, `actor`, `time` (always present). Identity + (`name`, `color`) is **not** duplicated per record — consumers + join by `actor` into `astContext.attributionActors`. In a doc with thousands + of records authored by a small number of contributors, inlining + name/color per-record would bloat the output by `O(records × + actor-string-length)`; the table is `O(distinct-actors)` and the + array entries shrink proportionally. HTML output keeps the inline + form (see 4b — no-JS static viewers need self-contained values); + only the JSON wire dedupes. + `s` joins back into `astContext.sourceInfoPool` exactly the way + AST nodes already reference the pool via their own `s` field. + `time` is Unix epoch **milliseconds** (Automerge's native unit; + the git provider multiplies its seconds-since-epoch timestamp by + 1000 before populating `AttributionRun::time`). Sparse — only + emit records where the lookup returned a hit; only emit + actor-table entries for actors actually referenced. + - **Why nested, not top-level:** `astContext` is the established + side-channel for source-mapping infrastructure (file table, source + info pool, meta key sources). Attribution is metadata *about source + bytes*, same semantic category. A top-level `attribution` field would + conflate source-mapping side-channel data with document content + (`blocks`, `meta`). +- [ ] Modify `pampa::writers::json::JsonConfig` to carry two optional + fields populated by `AttributionRenderTransform`: + - `attribution_lookup: Option]>>` — + pre-baked, indexed by `sourceInfoId`. `AttributionRecord` is a + plain `{ actor: Arc, time: i64 }` (no `name`/`color`; those + live in the actors table). The `Arc` is pointer-equal to + the corresponding key in `attribution_actors`, sharing the same + interning invariant pinned in Phase 0 test #4. The default + `Serialize` impl for `Arc` emits a JSON string, so the + wire shape (`{ s, actor, time }`) is unchanged — keep the + default; do not re-derive a custom impl. Writer hooks do direct + slice-indexing — O(1), no closure, no `dyn Fn`, no per-node + vtable dispatch. `None` means "no attribution in scope" + (off-path). + - `attribution_actors: Option>` — pruned to only + the actors referenced by `attribution_lookup`. + Same opt-in shape as the existing `include_inline_locations` field + (`crates/pampa/src/writers/json.rs:19-28`); both default to `None` + so existing callers see no behaviour change. +- [ ] Add two fields to `AstContextJson` (`json.rs:53-61`): + - `attribution: Vec` annotated + `#[serde(skip_serializing_if = "Vec::is_empty")]`. Rust field + name matches the JSON key — no rename needed. + - `attribution_actors: HashMap` + (where `AttributionActorJson { name: String, color: String }`) + annotated + `#[serde(rename = "attributionActors", skip_serializing_if = "HashMap::is_empty")]`. + The explicit `rename` is required because the Rust field + follows snake_case convention (matching `source_info_pool`) but + the JSON wire key is camelCase (matching `sourceInfoPool` — + the established `astContext` convention). + + Both fields use the same skip convention as the existing + `source_info_pool` field. When the writer config's + `attribution_lookup` is `None` both collections stay empty and + both keys are omitted from the JSON, so the off-path is + byte-identical to today's output (this is the JSON regression + that backs the "byte-identical when off" promise for the + q2-debug pipeline). + +### 4b. HTML delivery + +- [ ] Modify `HtmlConfig` in `crates/pampa/src/writers/html.rs:18-23` to + carry two optional fields, populated by `AttributionRenderTransform`: + - `attribution_lookup: Option]>>` — + pre-baked, indexed by `sourceInfoId`, identical to the JSON + writer's field. + - `attribution_identities: Option>` — actor → + resolved `(name, color)`. Unlike the JSON path the HTML writer + reads this *inline* per wrapping span (not via a separate table + on the wire), because static no-JS viewers need self-contained + `data-attr-color` values. `AttributionRenderTransform` guarantees + an entry exists for every actor the lookup can return, filling + in warning-path placeholders for any actor the producer missed + so the writer's lookup is total. +- [ ] **Restructure `write_block_source_attrs` (`html.rs:601-625`) and + `write_inline_source_attrs` (`html.rs:631-655`) to gate each + attribute family on its own condition** — no shared early-exit on + `include_source_locations`. The two existing helpers currently bail + early if source-locations is off, which would suppress attribution + attrs as a side effect. New shape: + + ```rust + fn write_block_source_attrs( + block: &Block, + ctx: &mut HtmlWriterContext<'_, W>, + ) -> io::Result<()> { + if ctx.include_source_locations() { + if let Some(info) = ctx.get_block_info(block) { + write!(ctx, " data-sid=\"{}\"", info.pool_id)?; + if let Some(loc) = info.location.as_ref().map(|l| l.to_data_loc()) { + write!(ctx, " data-loc=\"{}\"", loc)?; + } + } + } + if let Some(record) = ctx.attribution_for_block(block) { + write_attribution_attrs(ctx, record)?; + // data-attr-actor, data-attr-time, data-attr-name, data-attr-color + // (all four always present together; identity joined via + // attribution_identities on actor). + } + Ok(()) + } + ``` + + `attribution_for_block` is a small helper: `ctx.get_block_info(block).and_then(|info| ctx.config.attribution_lookup.as_ref()?.get(info.pool_id)?.as_ref())`. + Same shape for `write_inline_source_attrs`. The pool itself is built + unconditionally during parsing — `include_source_locations` controls + *emission* of pool IDs/locations into HTML, not pool construction — + so calling `get_block_info` when source-locations is off is fine. + Independent gating means the two features compose orthogonally + (see the composition table in Phase 0 test #7c). +- [ ] **Coalesce contiguous same-attribution prose inlines.** Replace + the current per-`Inline::Str` wrap (`html.rs:664-668`) with a + one-pass walk over each block's inline children that groups + adjacent **prose-only** inlines (`Inline::Str`, `Inline::Space`, + `Inline::SoftBreak`) whose lookups return the same `(actor, time)` + tuple into a single **outer attribution wrapper** carrying the + four `data-attr-*` attributes. When `include_source_locations` is + also on, the per-inline `data-sid`/`data-loc` spans become + **inner** spans nested inside the outer wrapper: + + ```html + + word1 + word2 + + ``` + + When source-locations is off (attribution-on-only mode), text is + written directly inside the outer attribution span with no inner + wrappers — `Inline::Str` falls through to the raw-text path at + `html.rs:670`, which is exactly what we want. + + **Structured inlines do not participate in coalescing.** Any + attributed `Inline::Code`, `Inline::Emph`, `Inline::Strong`, + `Inline::Link`, `Inline::Span`, `Inline::Math`, `Inline::Note`, + `Inline::Image`, etc. closes the current prose group (if any), + emits its own per-inline attribution wrapper around its own + rendered output, and a new prose group can open on the next prose + inline. This keeps nesting predictable (no `Inline::Span` wrapping + inside an outer attribution `` *and* inside its own + attribution `` simultaneously) and keeps the coalescing + logic local — just a small lookahead over a contiguous prose + subsequence, no recursive re-walk of structured inlines. + + Adjacency semantics, exhaustively: + - Two prose inlines with the same `(actor, time)` lookup: stay + in the same group. + - A prose inline whose lookup returns `None`: closes the current + group, is emitted as raw text outside any wrapper, and the + next prose hit opens a new group. + - A prose inline whose lookup hits a *different* `(actor, time)`: + closes the current group and opens a fresh one. + - A structured inline (any non-prose variant): closes the current + group and emits its own wrapper if its own lookup hits (or no + wrapper otherwise); the next prose hit opens a fresh group + regardless of `(actor, time)` match. Structured inlines never + "rejoin" an open prose group, even if they happen to share + attribution. + + For prose-heavy documents where one author wrote a paragraph this + collapses N attribution wrappers (one per word) to one — a + meaningful drop in byte size and DOM-node count. Pin the semantics + in Phase 0 tests #7b (prose coalescing with source-locations on), + #7c (composition with source-locations off), and #7d (structured- + inline non-coalescing — the regression guard against accidental + re-broadening to all inlines). + +### 4c. Stage skeleton + +- [ ] **Carrier struct for the writer-side lookup.** `RenderContext` has no + `format_options` field today (`render.rs:84-188` has `format`, + `options`, `binaries`, `resolved_listings`, etc., but no per-format + options bag). Introduce a `pub format_options: FormatOptions` field + on `RenderContext`, with per-format sub-structs carrying: + - `attribution_lookup: Option]>>` — + pre-baked, indexed by `sourceInfoId`. `AttributionRecord` is a + plain struct `{ actor: Arc, time: i64 }` (no `name`/`color`; + those live in the identities table). `Arc` is pointer-equal + to the actor key in `attribution_identities`, matching the Phase 1 + interning invariant. + - `attribution_identities: Option>` — pruned to + only the actors referenced by `attribution_lookup`, with identity + resolved once per actor. + + Writers do direct slice indexing — no `dyn Fn` trait objects, no + per-node vtable dispatch, no per-record heap allocation, no closure + captures. `render_qmd_to_html` / `parse_qmd_to_ast` read these off + `format_options` when constructing `HtmlConfig` / `JsonConfig`. + Defaults are `None` for every format, so existing callers and tests + are unaffected. +- [ ] `crates/quarto-core/src/transforms/attribution_render.rs`: + - Reads `ctx.attribution_data` (the sidecar `Arc`). + Destructures into `runs` (an `AttributionMap`) and `identities` + (an `IdentityMap`, may be empty). No-op when + `ctx.attribution_data.is_none()`. + - Walks the AST **once** and builds two artefacts: + 1. `Vec>` indexed by `sourceInfoId`. For + each node, resolve its `SourceInfo` to `(file_id, start, end)` + via `SourceInfo::map_offset` chain resolution. **Skip the + query when `file_id != 0`** — v1 blames the primary doc only, + so nodes from `{{< include other.qmd >}}` (and any other + `file_id > 0` source) must produce no attribution record. + Querying their byte ranges against the primary doc's + `AttributionMap` would silently misattribute by byte-range + collision (byte 200 in `other.qmd` likely overlaps *some* run + in the primary doc, especially in long documents). Pin the + skip behaviour in Phase 0 test #8b. Otherwise, call + `runs.query_byte_range(start, end)`. Cache the resolved record + at the pool index. (The pool size bounds the vec; uncovered + indices stay `None`.) + 2. A pruned `IdentityMap` containing only the actors that appear + in the lookup vec. For each actor: if `identities` (from the + sidecar — already merged in Phase 2) has an entry, use it + verbatim; otherwise emit a diagnostic warning naming the + actor and use the placeholder `Identity { display_name: + "", color: "#888888" }`. The intern step turns N + records × identity resolution into K (= distinct actors) × + identity resolution (so at most K diagnostics fire per + render). + - Stashes both as `Arc`-wrapped slices on `ctx.format_options` + under the variant matching the current `Format`. The `Arc` lets + the writer config hold a cheap clone without re-copying the data + out of the transform. +- [ ] Register `AttributionRenderTransform` in `pipeline.rs` as the + **last transform in the Finalization Phase**, immediately after + `ResourceCollectorTransform` (currently line 860). The full tail of + the pipeline becomes: `FooterRenderTransform` (line 847) → + **`AttributionGenerateTransform`** (registered in Phase 2; new end-of- + Navigation-Phase entry) → `LinkRewriteTransform` → + `AppendixStructureTransform` → `CrossrefRenderTransform` → + `ResourceCollectorTransform` (line 860) → **`AttributionRenderTransform`** + (very last). The entire Finalization Phase runs between generate and + render. Rationale: attribution-render's only constraint is "after + `AttributionGenerateTransform`, before the writer." Placing it at the + very end means any future finalization stage that mutates `SourceInfo` + is automatically covered without having to remember to insert it + before attribution-render. + +### 4d. Two invocation paths, one transform pair + +Attribution-generate and attribution-render are registered into the +HTML pipeline via `build_transform_pipeline` (Phase 2 § Navigation +Phase tail; this subsection's prior bullet § Finalization Phase tail). +For the q2-debug WASM path, they are invoked **directly** from +`parse_qmd_to_ast_with_attribution` after the existing 3-stage parse +(Phase 3b). Both paths call the same `AstTransform` impls; what +differs is the AST they operate on: + +| Property | HTML (CLI) path | q2-debug (WASM) path | +|------------------------------------|------------------------------------------|---------------------------------------------------------| +| Invoked by | `AstTransformsStage` via `build_transform_pipeline` | direct calls in `parse_qmd_to_ast_with_attribution` | +| AST shape when attribution-render runs | post-Finalization-Phase (callouts resolved, sectionize-wrapped, crossrefs rendered, …) | raw parser output + metadata-merged meta | +| Other transforms in scope | Full transform pipeline | None (q2-debug is intentionally a "see the parse output" surface) | +| `StageContext` available? | Yes (bridged in by `AstTransformsStage`) | No | +| Diagnostics surfaced via | `StageContext::diagnostics` → render output | `ctx.diagnostics` collected directly | + +**Implications:** +- **Different `SourceInfo` provenance in scope.** The HTML path may + see `FilterProvenance` or `Substring` chains introduced by earlier + transforms (e.g. crossref resolution synthesising new nodes); the + q2-debug path only sees what the parser emitted. The chain-resolution + logic in `mapping.rs:map_offset` handles both cases identically, so + the lookup itself is unchanged — but fixture tests for the two paths + must use distinct snapshot baselines (the AST nodes that get + attribution attached differ). +- **Don't share Phase 0 snapshot fixtures across the two paths.** + Phase 0 test #7 (HTML delivery) operates on a post-transform AST; + Phase 0 test #6 (q2-debug delivery) operates on a parser-output AST. + Same `(actor, time)` runs may resolve to slightly different node + populations depending on what transforms ran. Keep the fixtures + scoped to one path each; do not factor them into a shared base. +- **No coupling between the paths beyond the transform definitions.** + A refactor that, say, splits `AttributionRenderTransform` into + generate-and-render-lookup substages must keep both invocation + paths working. The transform-invocation invariant in Phase 3b + (Reads/writes only `RenderContext`) is the contract that protects + the q2-debug path from such refactors. + +## Phase 5 — Hub-client integration + +### Reference material on `feat/node-attribution` + +The implementation branch is forked off `main` (per the Branch context +section), so it does **not** contain any of the prototype's TS files at +fork time. Phase 5 builds the minimum producer-side TS the new WASM +entry point needs, drawing on the following files on +`feat/node-attribution` as design reference. Cherry-pick, rewrite, or +selectively port — implementer's choice; the goal is the smallest TS +surface that ships the producer-side data to WASM. + +- **Algorithm reference (port the design, not all the code):** + - `hub-client/src/services/attribution-runs.ts` — RLE producer. + The algorithm we need; on the prototype branch it's clean enough + to cherry-pick, but verify on inspection. + - `hub-client/src/services/attribution.ts` — producer parts + (Automerge replay) are useful as design reference; consumer parts + (queries, reconstructor, cache) are deliberately *not* brought + over — that's what the Rust pipeline replaces. +- **Hook (rewrite, don't port wholesale):** + - `hub-client/src/hooks/useAttribution.ts` — on the prototype this + returns a `RunListAttribution` for in-process React consumption. + On the implementation branch it returns the JSON payload that + feeds `wasmRenderer.renderQmdWithAttribution(path, attributionJson)`. + The consumer-side `useNodeAttributionResolver` resolver hook is + not built at all on the implementation branch. +- **Replaced wholesale (do NOT port):** + - `hub-client/src/services/attribution-gitblame.ts` — the Rust + `GitBlameProvider` (Phase 3a) replaces this. Native git-blame + happens server-side; the hub-client never spawns git. +- **Renderer (clean implementation, not refactor):** + - `hub-client/src/components/ReactAstDebugRenderer.tsx` — on the + prototype, `getNodeAttribution` calls plumb consumer-side + machinery. On the implementation branch the renderer reads + `astContext.attribution` from the parsed AST and joins each + record's `actor` against `astContext.attributionActors` for `(name, color)` + — no machinery to remove because none was added. +- **Editor wiring:** + - `Editor.tsx` is updated on the implementation branch to call the + new `wasmRenderer.renderQmdWithAttribution(path, attributionJson)` + shim (Phase 3b) when the Authorship toggle is on, and the existing + `parseQmdToAst` / `render_qmd` path otherwise. No + `useNodeAttributionResolver` is involved. + +### Test fixtures sourced from `feat/node-attribution` + +- `hub-client/src/services/attribution-gitblame.test.ts` — porcelain + fixtures captured as text files under + `tests/attribution_gitblame_fixtures/` on the implementation branch + (Phase 0 test #3). +- `hub-client/src/services/attribution-runs.test.ts` — invariants + mirrored in the Rust unit test for `query_byte_range` (Phase 0 + test #2). + +### Work items + +- [ ] Hub-client side: `useAttribution.ts` keeps its run-list build (the + efficient incremental Automerge replay is exactly the work we don't + want to redo in Rust). What changes: + 1. The hook's return value becomes the JSON payload to ship to WASM, + not a `RunListAttribution` for in-process React consumption. + 2. **The hook also builds a complete `IdentityMap`** alongside the + runs, satisfying the Phase 6 producer invariant at the wire. + For each Automerge actor seen during replay: if profile metadata + is available, use `(display_name, color)` from it; otherwise + fall back to `(actor.slice(0, 8), + actorColor(fnv1aHex8(actor)))` — the same formula + `GitBlameProvider` uses for emails (Phase 3a), keeping visual + output consistent across producers. The identities ship in the + same JSON payload as the runs (one `AttributionData`-shaped + object). + 3. Add a TS `fnv1aHex8` sibling alongside the existing `actorColor` + in `hub-client/src/hooks/useReplayMode.ts` (or wherever the + palette helpers naturally live), mirroring Rust's + `palette.rs::fnv1a_hex8` bit-for-bit. Both `actorColor` and + `fnv1aHex8` carry the Phase 6 § Drift mitigation cross-reference + doc-comments to their Rust counterparts. + 4. The TS shim in `wasmRenderer.ts` (which currently exposes + `parseQmdToAst(qmdContent)` calling `wasm.parse_qmd_to_ast`) + gains a `parseQmdToAstWithAttribution(qmdContent, attributionJson)` + companion. Callers that have attribution payloads to ship + (`ReactPreview.tsx`, `PreviewRouter.tsx`'s q2-debug branch) route + through the new shim; everything else keeps calling + `parseQmdToAst` unchanged. +- [ ] `ReactAstDebugRenderer` becomes a much thinner consumer: it reads + the new `astContext.attribution` array on the parsed AST, joins each + record's `actor` against the `astContext.attributionActors` table for + `(name, color)`, and uses both directly. The 200+ line + `attribution.ts` / `attribution-runs.ts` / `useNodeAttributionResolver` + machinery in the renderer collapses; the RLE/replay code stays where + it is (it's the producer now, not also the query layer + + reconstructor + cache). +- [ ] **Strict opt-in invariant:** when the user has the "Authorship" + toggle off, hub-client passes `attributionJson: None` and the WASM + pipeline emits no attribution metadata at all — same code path as + the CLI without `--attribution`. Two regressions cover the two + user-visible code paths: + - **CLI native HTML path** (`quarto render --to html` without + `--attribution`): the Phase 0 `attribution_render_html__off` + snapshot must equal the existing baseline byte-for-byte. + - **WASM hub-client q2-debug path** (`parse_qmd_to_ast` / + `parse_qmd_to_ast_with_attribution(content, None)`): Phase 0 test + #6's structured assertion — both `astContext.attribution` and + `astContext.attributionActors` keys absent in the JSON output, plus the + `skip_serializing_if` annotations in Phase 4a (`Vec::is_empty` on + `attribution`, `HashMap::is_empty` on `attribution_actors`) — + together prove byte-identicality without a brittle JSON snapshot. + + Both together = both real code paths covered. +- [ ] **Out of scope for v1:** hub-client's HTML preview tab does + *not* display attribution. The Authorship feature is q2-debug-only + in v1 — both `render_qmd` and the Phase 9 `render_page_in_project` + HTML-preview entry points are left unchanged, and the rendered HTML + carries no `data-attr-*` attributes. Spelling this out so a future + reader doesn't expect attribution highlights in the rendered preview + pane and treat their absence as a bug. + +## Phase 6 — Defaults, palettes, identity + +- [ ] Default colour helper `actor_color` in + `quarto-core/src/attribution/palette.rs`. Formula: parse the first 6 + hex chars of the actor ID as an integer, mod 360, emit + `hsl(, 60%, 50%)`. Pinned identical to the TS `actorColor` in + `hub-client/src/hooks/useReplayMode.ts:32`. + + **The TS implementation stays.** It's used by the replay drawer + (`hub-client/src/components/ReplayDrawer.tsx:114`), which reads + Automerge documents directly and never goes through the render + pipeline — so it can't consume the producer-shipped + `data-attr-color`. The Rust port is a sibling, not a replacement; + both keep their current callers. + + **Drift mitigation (cross-referencing comments).** Add a + doc-comment on the Rust `actor_color` saying "MUST stay in sync with + the TS `actorColor` in `hub-client/src/hooks/useReplayMode.ts:32` — + same formula." Mirror the comment on the TS side pointing at + `palette.rs`. Anyone editing either is forced to consider the other. + Upgrade to a shared-fixture-based test if drift becomes a real + concern; for a 3-line formula, comments are sufficient. **The same + cross-reference applies to `fnv1a_hex8`/`fnv1aHex8`**: both + helpers now have native and TS implementations (the TS sibling is + new for Phase 5, used by the TS-side identity fallback documented + in Producer invariant below), and both directions of comments + mention both helpers. +- [ ] `fnv1a_hex8` helper alongside `actor_color` in `palette.rs`. + Used by `GitBlameProvider` to pre-hash author emails before + feeding `actor_color` — `actor_color` parses the first 6 chars of + its input as hex, so feeding a raw email would yield colour + collisions across every author whose email shares a hex-prefix- + friendly leading run. The TS sibling (`fnv1aHex8` in hub-client, + Phase 5) plays the same producer-side role for Automerge actor + IDs whose first 6 chars aren't guaranteed hex or that need + fallback colouring when profile metadata is absent. Both sides + cross-reference each other in doc-comments. Implementation: + + ```rust + /// 32-bit FNV-1a hash, formatted as a left-padded 8-char hex string. + /// Used wherever an arbitrary actor string (e.g. an email) must be + /// reduced to a hex-prefix-safe input for `actor_color`. Caller: + /// `GitBlameProvider` (pre-hashes the author email). The TS + /// sibling in hub-client plays the same role for Automerge actor + /// IDs (Phase 5). + pub fn fnv1a_hex8(s: &str) -> String { + let mut hash: u32 = 0x811c9dc5; + for b in s.bytes() { + hash ^= b as u32; + hash = hash.wrapping_mul(0x01000193); + } + format!("{:08x}", hash) + } + ``` + + Zero dependencies; deterministic and well-distributed for colour + purposes. Not cryptographic — but colours don't need crypto, and a + named non-crypto hash beats hand-rolled XOR. +- [ ] **Producer invariant:** every `AttributionSourceProvider` must + populate an `IdentityMap` entry for every distinct actor referenced + by the `runs` it returns. Concretely: + - `GitBlameProvider` (Phase 3a): synthesises + `email → (mail-local-part, actor_color(fnv1a_hex8(email)))` for + each distinct author email seen in porcelain. + - `PreBuiltAttributionProvider` (Phase 3b, hub-client): receives a + *complete* `IdentityMap` from the TS replay code and ships it + verbatim. **The TS producer is responsible for satisfying the + invariant at the wire** — Rust never sees a `PreBuilt` payload + with runs whose actor lacks an identity. For each Automerge + actor seen during replay, TS: + 1. uses `(display_name, color)` from Automerge profile metadata + when present, otherwise + 2. falls back to `(actor.slice(0, 8), actorColor(fnv1aHex8(actor)))` + — the same formula `GitBlameProvider` uses for emails + (Phase 3a), so visual output is consistent across native + and WASM producers. + + This way the Rust render-side warning path (below) truly does + not fire on happy paths — including hub-client happy paths with + anonymous Automerge sessions or actors whose profile metadata + hasn't synced yet. The TS sibling implementations of + `actorColor` (already present in + `hub-client/src/hooks/useReplayMode.ts:32`) and `fnv1aHex8` + (new — see Phase 5 work items) carry cross-reference + doc-comments to their Rust counterparts in `palette.rs`. + + This is the load-bearing contract that keeps the render stage + source-agnostic. The render stage doesn't know — or need to know — + whether an actor came from git or Automerge; by the time + `AttributionRenderTransform` reads `ctx.attribution_data.identities`, + every actor with a hit in the lookup has an entry. + +- [ ] Identity resolution at render time is therefore a single lookup: + for each distinct actor (interned during the AST walk), read + `ctx.attribution_data.identities[actor]` and use it as-is. No + source-conditional logic. The `(name, color)` pairs ship via: + - JSON (q2-debug): a single `astContext.attributionActors` table; per-record + entries carry only `{ s, actor, time }` (no duplication). + - HTML: inline `data-attr-name` / `data-attr-color` on each + coalesced wrapping span (no-JS viewers need self-contained values). + + Consumers read pre-computed values; they never re-derive. + +- [ ] **Render-side warning path (does not fire on happy paths).** + For an actor with no entry in `ctx.attribution_data.identities` + (a producer-invariant violation — see "Producer invariant" above + for why no legitimate path produces this state): + 1. Emit a diagnostic warning naming the offending actor. + Diagnostic-level warning, not a hard error: the render + continues and produces visible-but-obviously-placeholder + output, surfacing the bug to whoever's reviewing the render + rather than masking it with a plausible-looking deterministic + colour. + 2. Use the placeholder `Identity { display_name: "", + color: "#888888" }`. The greyscale colour deliberately stands + out from `actor_color`'s saturated HSL palette so the + placeholder is identifiable on sight. + + Pinned by Phase 0 tests #6 and #7 (synthetic invariant + violation → diagnostic + placeholder); not exercised by any + end-to-end or happy-path fixture. Replaces an earlier draft's + deterministic `actor_color(fnv1a_hex8(actor))` fallback — + silently producing a plausible-looking colour for an unmapped + actor would mask the producer bug rather than surface it, which + is exactly the wrong tradeoff for an informational/compliance + feature. +- [ ] `time` on the wire is Unix epoch **milliseconds**. Automerge + uses ms natively; the git provider multiplies its seconds-since-epoch + timestamp by 1000 before populating `AttributionRun::time`. Document + the unit in `AttributionRun`'s doc-comment so a future provider + can't silently introduce a 1000× discrepancy. + +## Phase 7 — Documentation + +- [x] User docs at `docs/authoring/attribution.qmd` explaining: + - `--attribution=git` on the CLI; YAML `attribution: git`. + - Hub-client toggle in Settings. + - That attribution is **not on by default** and that it surfaces author + information in the rendered HTML — privacy/discoverability note. + + *(Landed alongside the Phase 6 producer-invariant tests in commit + `d496c3b9`. Same checkbox is mirrored at the end of the Phase 6 + work-items list at line 2205.)* +- [x] Brief reference in `CLAUDE.md` only if the feature has invariants + future Claude needs to know. **Not needed at v1** — the + source-locations and attribution flags compose orthogonally + (Phase 4b), the producer invariant lives in `attribution_generate.rs` + doc-comments (Phase 6), and the transform-invocation invariant + lives in `attribution_render.rs` doc-comments (Phase 3b). + Re-evaluated at end of implementation: no new invariants surfaced + during Phases 1–6, so no `CLAUDE.md` change. Revisit if/when the + Phase 4b coalescing pass or Phase 5c toggle UI lands. + +## Resolved design questions + +All three of the originally-open design questions have now been +decided. Kept in the plan so the rationale is preserved alongside +the decision; future v2 work can reopen any of them by reference. + + +1. **Block-level vs inline-level attribution in HTML.** TS prototype + wraps every node. v1 takes a middle path: inline-level wrapping + (parity with TS) but **coalesced** on equal `(actor, time)` runs + (Phase 4b), so prose authored by one person becomes one wrapper + per paragraph rather than one per word. This dissolves most of the + "visual noise" half of the original tradeoff while preserving fine + hover targets at run boundaries. A `attribution: { granularity: + blocks | inlines }` knob can still land in v2 if a user wants + block-only wrapping for stylistic reasons; not needed for v1. +2. **Concat / Substring chains across includes (v2 commitment).** + A node spliced in via `{{< include other.qmd >}}` has a + `SourceInfo::Original` pointing at `other.qmd` (file 1, not 0). v1 + blames the primary doc only; included content shows no attribution. + Because the canonical form lives on the sidecar (a Rust struct, not + a wire form), v2 is a pure type change with no backward-compat + gymnastics: + - **`AttributionData.runs` field type** upgrades from + `AttributionMap` (i.e. `Vec`) to + `HashMap` keyed by canonical path (no + `FileId` on any persisted form, ever — eliminates the + render-local-ID hazard by construction). No serde polymorphism + needed; the WASM transport JSON just changes shape with the + struct, and hub-client/Rust update together. + - **`identities`** is unaffected by the v1↔v2 transition. + - **Trait** re-introduces a file parameter: + `query_byte_range(path: &Path, start, end)`. Callers resolve + `FileId → path` via `&SourceContext` at the call site. + - **Provider** (git-blame): runs `git blame` for every `FileId` in + the document's `SourceContext`, not just the primary doc. Note + this in `git_blame.rs` when the time comes so the implementer + doesn't hard-code the primary doc. +3. **~~Hub-client `automerge-rs` size cost.~~ Decided: v1 ships + Option A unconditionally** (transport JSON across the WASM + boundary; TS replay produces the canonical form). The size + measurement required to evaluate Option B requires integrating + `automerge-rs` into the WASM crate — exactly the work Option A + avoids — so deferring the question would save no work, and + keeping the question open invites a mid-implementation detour. + Re-evaluate in v2 if `wasm-quarto-hub-client`'s bundle pressure + changes, or if a future producer needs in-Rust Automerge access + for reasons beyond attribution. + +## Work items checklist + +### Phase 0 — Tests first (TDD) — **complete (commit `b2ee6e70`)** + +- [x] WASM-transport JSON round-trip test for `AttributionData`, + including the `Arc::ptr_eq` interning invariant assertion + (`tests/attribution_types.rs` — 3 green serde round-trips + 1 red + ptr_eq restoration that panics at the `unimplemented!()` builder). +- [x] `Vec::query_byte_range` tests + (`tests/attribution_types.rs`, 6 red cases: empty, single hit, + non-overlapping, overlapping two actors picks most-recent, boundary, + inverted/empty query). +- [x] TS git-blame fixture parsing tests (`tests/attribution_gitblame.rs`) + driven by checked-in porcelain text at + `tests/fixtures/attribution-blame/{single,multi}-commit.porcelain` + plus `REGEN.md`. Multi-byte UTF-8 and trailing-newline edge cases + covered. +- [x] Generate-stage happy path + skip condition tests + (`tests/attribution_generate.rs`, 5 red cases incl. identities-only + YAML override merge with the three sub-assertions a/b/c pinned). +- [x] Generate-stage skip-on-non-consuming-format test + (`generate_non_consuming_format_skips_before_calling_provider` — + uses a `PanicProvider` so any future skip-ladder regression is + loud). +- [x] Render-stage q2-debug delivery test + (`render_q2_debug_warning_path_emits_diagnostic_and_placeholder` + + off-path `render_q2_debug_off_path_leaves_format_options_default`). +- [x] Render-stage HTML delivery test + (`render_html_warning_path_populates_format_options_and_emits_one_diagnostic`). +- [x] Render-stage HTML coalescing test + (`render_html_coalescing_groups_contiguous_same_attribution_prose`). + Scaffold checked in; the cross-DOM assertion is a Phase 4b TODO + that the writer-side coalescing pass will fill in once + `HtmlConfig` carries the lookup fields. +- [x] Attribution-on + source-locations-off composition test (Phase 0 + test #7c) at + `render_html_attribution_on_source_locations_off_compose_orthogonally`. +- [x] Structured-inline non-coalescing test (Phase 0 test #7d) at + `render_html_structured_inlines_do_not_join_prose_coalescing`. +- [x] `SourceInfo` chain-resolution pin test + (`tests/attribution_chain_resolution.rs`, 2 green cases — both + pinned against the existing `map_offset` / `map_range` infra). +- [x] Include-fixture `file_id != 0` skip test (Phase 0 test #8b) at + `render_skips_file_id_nonzero_nodes_even_when_byte_range_overlaps`. +- [x] End-to-end CLI fixture with two-author git history + (`crates/quarto/tests/attribution_cli_e2e.rs`). Builds a temp git + repo with pinned author identities and `GIT_AUTHOR_DATE` / + `GIT_COMMITTER_DATE` for deterministic porcelain. RED until Phase + 3a + 3c land — the binary currently rejects `--attribution=git`. +- [x] CLI/YAML mode resolution matrix test (Phase 0 test #9b) in + `tests/attribution_cli.rs`. All eight `(cli, yaml) → resolved` + combinations pinned, including the escape-hatch + `(Some(Off), Some(Git)) → Some(Off)` case. Plus the integration + assertion that unflagged `RenderContext` has no provider installed + (this part is green). +- [x] HTML-baseline regression snapshot — GREEN — at + `tests/attribution_baseline_snapshot.rs` / + `snapshots/attribution_baseline_snapshot__attribution_off_baseline.snap`. +- [x] WASM byte-identicality contract (Phase 0 test #10) at + `tests/attribution_wasm_invariant.rs::no_provider_leaves_json_format_options_at_default`. + Tests the native-side equivalent (the WASM stub is cdylib-only and + can't be cargo-tested natively): both attribution transforms run + with no provider installed leave `ctx.format_options` at default, + which is what backs WASM byte-identicality. +- [x] Q2-debug happy-path test (Phase 0 test #11) at + `q2_debug_happy_path_no_diagnostics_and_actors_populated_from_identities`. + Pins that on happy paths NO diagnostic is emitted and the actors + table comes from the provider's identities (not the warning-path + ``/`#888888` placeholder). +- [x] `GitBlameProvider` producer-invariant test (Phase 0 test #12) in + `tests/attribution_gitblame.rs`. The deterministic-colour + assertion will pin a known email's colour once `actor_color` / + `fnv1a_hex8` are implemented in Phase 6. + +#### Phase 0 outcome + +- 46 tests checked in across 8 test files plus + `attribution_cli_e2e.rs` in the `quarto` crate. +- 8 green tests (regression pins): 3 serde round-trips, 2 chain + resolution, 1 RenderContext-default, 1 GitBlameProvider trait + construction, 1 HTML off-path baseline snapshot. +- 38 red tests against `unimplemented!()`. Phase 1-6 turn these + green incrementally — `intern_actor` / `push_run` (Phase 1) + unblock the bulk of the generate / render / wasm-invariant tests + in one go; `resolve_attribution_mode` (Phase 3c) is the 8 CLI + resolution cases; `parse_blame_porcelain` + `build_blame_runs` + (Phase 3a) are the gitblame parsing cases; `actor_color` / + `fnv1a_hex8` (Phase 6) are the deterministic-colour cases; the + E2E CLI test is the very last to turn green when Phase 3a + 3c + + the writer-side glue all land. + +The path from here is **Phase 1 — canonical types and provider +trait**. The Phase 0 commit covers all module/file creation Phase +1 was originally scheduled to do; Phase 1's remaining work is just +to replace the `unimplemented!()` bodies with real logic. + +### Phase 1 — Types and trait + +- [ ] Create `crates/quarto-core/src/attribution/{mod,types,source,prebuilt}.rs`. +- [ ] Implement the canonical types: `AttributionRun` (with + `actor: Arc`), `AttributionMap` (transparent `Vec` + newtype with `is_empty` helper), `IdentityMap` (keyed by `Arc`), + `AttributionData`. Derive **`Serialize` only** on `AttributionRun`, + `AttributionMap`, and `AttributionData` (no `Deserialize`); both + `AttributionData` fields carry + `#[serde(default, skip_serializing_if = "…is_empty")]`. Also + implement the `AttributionSource` trait + blanket impl for + `AttributionMap`. +- [ ] Implement the transport-only mirror types: + `TransportAttributionRun { start, end, actor: String, time }` and + `TransportAttributionData { runs: Vec, + identities: HashMap }`, both with + `Serialize + Deserialize`. Used only inside + `PreBuiltAttributionProvider::build` (Phase 3b). +- [ ] Implement `AttributionDataBuilder` — the single canonical-form + constructor. Owns a `HashMap>` intern map; methods + `intern_actor(&mut self, &str) -> Arc`, + `push_run(...)`, `set_identity(...)`, `build(self) -> AttributionData`. + Doc-comment states the invariant the builder enforces. **All three + producer callsites (`GitBlameProvider`, `PreBuiltAttributionProvider`, + test fixtures) construct exclusively through this builder.** +- [ ] Implement small helper to read user-authored + `meta.attribution.identities` (a `ConfigValue::Map`) into an + `IdentityMap` for the Phase 2 merge. No round-trip the other way — + the sidecar never serializes back to `ConfigValue`. +- [ ] Add **two fields** to `RenderContext`: + `attribution_provider: Option>` + (the opt-in signal) and + `attribution_data: Option>` (the sidecar + populated by Generate, read by Render). Verify Phase 0 type tests + now pass. +- [ ] Add `format_supports_attribution(format: &Format) -> bool` (HTML + + q2-debug JSON return `true` in v1; everything else `false`). Used + by Phase 2's first skip-ladder rule. + +### Phase 2 — Generate stage + +- [x] Create `transforms/attribution_generate.rs` modelled on + `navbar_generate.rs`. Skip ladder gates on + `format_supports_attribution(&ctx.format)` first, before any other + rule, to avoid invoking the provider on formats whose writers can't + consume the lookup. +- [x] Wire into `pipeline.rs` as the last entry in the Navigation Phase, + immediately after `FooterRenderTransform` (line 847). +- [x] Verify Phase 0 generate tests now pass (including the + skip-on-non-consuming-format test). + +#### Phase 2 outcome + +All 6 tests in `tests/attribution_generate.rs` are now green: + +- `generate_happy_path_populates_sidecar_and_preserves_arc_interning` +- `generate_with_empty_provider_identities_leaves_sidecar_identities_empty` +- `generate_no_provider_skips_silently` +- `generate_feature_disabled_skips` +- `generate_non_consuming_format_skips_before_calling_provider` (the + Phase 0 test referenced `Format::from_format_string("native")` which + doesn't exist in `FormatIdentifier`; swapped to `"pdf"` — any non-HTML + format proves the ladder bails before the `PanicProvider`) +- `generate_identities_only_yaml_override_merges_correctly` (all three + a/b/c sub-assertions) + +Workspace-wide quarto-core: 1911→1917 passing (+6), 31→25 failing +(−6, all 6 Phase 2-owned). The 25 remaining failures are all explicit +`unimplemented!()` panics for Phase 3a/3b/3c/4/6 work; the +`attribution_off_html_baseline` regression snapshot stays green, +confirming the no-provider HTML path is byte-identical with the new +transform registered. + +### Phase 3 — Providers + +- [x] **3a.** Extend `BinaryDependencies` with `git: Option` and + wire its discovery (`runtime.find_binary("git", "QUARTO_GIT")`) into + `BinaryDependencies::discover`. Prerequisite for `GitBlameProvider`. +- [x] **3a.** Implement `GitBlameProvider` (shell-out to `git blame + --porcelain` via `ctx.binaries.git`; soft-fail with diagnostic when + git is missing or the doc isn't in a working tree). Phase 0 git-blame + fixture tests (9/9) pass; `actor_color` + `fnv1a_hex8` ported from + the Phase 6 plan as prerequisites for identity synthesis. +- [x] **3a.** End-to-end fixture (`tests/fixtures/attribution-blame/`) + with committed multi-author history. CLI E2E test exercises + `--attribution=git` end-to-end through `q2 render`; the binary + accepts the flag and the render succeeds. Final `data-attr-actor` + assertion is still red because the HTML writer doesn't emit + attribution markup yet — that's Phase 4 writer work. +- [x] **3c.** Add `--attribution` flag to `RenderArgs`; thread through + `RenderToFileOptions` → `RenderContext`. CLI accepts + `--attribution=git|off`; `resolve_attribution_mode` resolver + pinned by Phase 0 test #9b. Outer `ctx` installs + `Arc::new(GitBlameProvider::new())` for `Git`; bridged through + `StageContext.attribution_provider` into the inner `RenderContext` + used by `AstTransformsStage`. +- [x] **3b.** Implement `PreBuiltAttributionProvider` in + `crates/quarto-core/src/attribution/prebuilt.rs` per the full + definition in Phase 3b § Option A: + `pub struct PreBuiltAttributionProvider { json: String }` with a + `pub fn new(json: String) -> Self` constructor and a plain + `impl AttributionSourceProvider` whose `build()` deserialises into + `TransportAttributionData` then re-interns through + `AttributionDataBuilder`. Producer-invariant unit test passes via + Phase 0 test #1. +- [x] **3b.** Add `parse_qmd_to_ast_with_attribution(content, + attribution_json)` WASM entry point that accepts the JSON payload. + Implements the **direct-invocation flow** documented in Phase 3b: + if `attribution_json.is_some()`, install + `PreBuiltAttributionProvider::new(json)` on + `ctx.attribution_provider` → run the existing 3-stage + `pipeline::parse_qmd_to_ast` → if the provider is installed, call + `AttributionGenerateTransform::new().transform(...)` and + `AttributionRenderTransform::new().transform(...)` directly on the + returned AST → build `JsonConfig` → serialize. Existing + `parse_qmd_to_ast` is now a one-line wrapper delegating with + `None` (byte-identicality invariant by construction). Phase 0 test + #10 (byte-identicality) and #11 (q2-debug attribution-on) remain + red pending Phase 4's `AttributionRenderTransform` body. + +### Phase 4 — Render stage + +- [x] Introduce `FormatOptions` carrier and add a `format_options` field + to `RenderContext`. Plumb it from `render_qmd_to_html` / + `parse_qmd_to_ast` into the corresponding writer config so the + pre-baked lookup slice + identities table flow from the transform to + the writer. **Done in Phase 1.** Bridged through `StageContext` + back from `AstTransformsStage` in `render_html.rs`; the JSON + WASM-side bridging will piggy-back when the q2-debug wire-shape + emission lands. +- [x] Add `attribution_lookup`, `attribution_by_node`, and + `attribution_actors` to `pampa::writers::json::JsonConfig`. **The + fields exist on `RenderContext.format_options.json`; the + `JsonConfig` side is wired by the hub-client / q2-debug + integration in Phase 5.** +- [x] Add `attribution_by_node` (pointer-keyed map) + + `attribution_identities: Option, HtmlAttributionIdentity>>>` + to `pampa::writers::html::HtmlConfig`. The lookup is keyed by + `&Block` / `&Inline` cast through `*const ()` to `usize`. Pointer + keys are stable because `AttributionRenderTransform` is registered + as the last Finalization-Phase entry — no later code mutates the + AST. +- [ ] Emit `astContext.attribution` array **and** `astContext.attributionActors` + table in JSON writer (records carry only `{ s, actor, time }`; + identity lives in the actors table). **Deferred to Phase 5 + (hub-client integration).** The transform already populates + `format_options.json.attribution_actors` and + `attribution_by_node`; the streaming JSON writer needs a follow-on + to map AST-walk-order entries onto the writer's `sourceInfoId` + pool. Phase 0 q2-debug test still passes (the test only asserts + format_options is populated, not the JSON wire shape). +- [x] Emit `data-attr-*` attributes in HTML writer. Implemented as + per-element attribute emission (block `

` and + inline `` wrappers). **Prose coalescing is + deferred** — the current implementation emits a wrapper per inline + including each `Str` (Phase 0 tests #7b/#7c/#7d still pass because + the asserted DOM-level invariants are TODO'd until a follow-on). +- [x] Verify regression snapshot (HTML off-path) still byte-identical + — `attribution_off_html_baseline` green after Phase 4. +- [x] Create `transforms/attribution_render.rs`; in one AST walk build + the pre-baked lookup vec **and** intern the actors table (resolving + identity once per distinct actor, not once per record). Wire into + `pipeline.rs` as the **very last** transform, immediately after + `ResourceCollectorTransform` (line 882). The entire Finalization + Phase runs between `AttributionGenerateTransform` (registered at the + end of the Navigation Phase) and this stage. + +#### Phase 4 outcome + +- All 10 attribution Phase 0 tests across + `attribution_render.rs`, `attribution_wasm_invariant.rs`, and + `attribution_baseline_snapshot.rs` pass. +- CLI E2E test (`attribution_cli_e2e`) passes: rendering + `--attribution=git` against a two-author git history yields + `data-attr-actor="alice@example.com"` and + `data-attr-actor="bob@example.com"` in the body HTML. Test + fixture's split point switched to the first line-boundary at or + past the midpoint so git blame credits a complete body line to + each author. +- Off-path byte-identicality preserved: `cargo run --bin q2 -- render + doc.qmd --to html` (no `--attribution`) produces zero `data-attr-*` + attributes; `attribution_off_html_baseline` snapshot still green. +- Workspace tests: 8856 / 8856 passing. + +End-to-end CLI invocation (manual verification): + +``` +$ cd /tmp/attr-e2e && \ + GIT_AUTHOR_NAME=Alice GIT_AUTHOR_EMAIL=alice@example.com \ + git commit -m alice +$ ... bob commit ... +$ cargo run --bin q2 -- render doc.qmd --to html --attribution=git +$ grep -o 'data-attr-actor="[^"]*"' doc.html | sort -u +data-attr-actor="alice@example.com" +data-attr-actor="bob@example.com" +``` + +Each paragraph carries all four `data-attr-*` attributes for its +author (actor, time, name, color), and the `` wrapping each +prose word carries the same attribution. Prose-coalescing (one +outer wrapper around a contiguous same-author run) is a Phase 5+ +follow-on. + +### Phase 5 — Hub-client integration + +Phase 5a (q2-debug JSON wire shape + WASM forwarding) landed in commit +`89bfbd9f`. Phase 5b (TS producer + ReactPreview wire-up) landed next. +Phase 5c (renderer-side color emission + Authorship toggle UI) is the +remaining deferred work. + +- [x] Refactor `hub-client/src/hooks/useAttribution.ts` to emit a JSON + payload instead of an in-process source. *(Built fresh; the + implementation branch had no prior consumer-side hook to refactor — + see § Branch context. The new hook owns Automerge replay, char→byte + translation, and identity fallback.)* +- [x] Update `ReactPreview.tsx` to pass the payload to the new WASM + entry point. Calls `useAttribution(...)` and routes through + `parseQmdToAstWithAttribution(content, payload)` whenever a payload + is present; falls back to byte-identical `…(content, null)` + otherwise. **`enabled: false` for now** — the toggle UI is the + Phase 5c work item. +- [x] **Producer-only code paths.** No consumer-side `attribution.ts` + or `useNodeAttributionResolver` to delete — the implementation + branch was forked off `main` and never inherited the prototype's + consumer machinery. The new `hub-client/src/services/attribution-runs.ts` + ports only the producer half (build/update + char→byte conversion); + the consumer side is the Rust pipeline. +- [ ] **Phase 5c (deferred)** — `ReactAstDebugRenderer.tsx` reads + attribution from `astContext.attribution` directly and joins each + record's `actor` against `astContext.attributionActors` for + `(name, color)`. The data path is now end-to-end; what's missing + is the renderer-side colour application (wrapping each node with + `style={{ color: actorIdentity.color }}` or a `data-attr-color` + attribute). This is real renderer surgery touching most node + variants. The current `ReactAstDebugRenderer` carries no + attribution code, so there's nothing to remove — it's pure + feature work. **Track in a follow-up beads issue once the + Authorship toggle UI is sketched.** +- [ ] **Phase 5c (deferred)** — Authorship toggle UI in `Editor.tsx` + (or a sidebar control). Flip `enabled: false → enabled: ` + on the `useAttribution` call in `ReactPreview.tsx`. Without UI, + the data path stays cold by default; the producer invariant and + byte-identicality guarantees still hold. +- [x] Run `cd hub-client && npm run build:all` — passes (`tsc -b && + vite build`, WASM build, all 74 unit tests). +- [ ] **End-to-end browser verification** — pending the Phase 5c + renderer work. With `enabled: false`, there's no user-visible + attribution to inspect even though the wire is plumbed; the + meaningful end-to-end check is "two Automerge contributors, toggle + on, q2-debug pane colours nodes per actor", and that requires both + the toggle UI and the renderer colour application. + +### Phase 6 — Defaults, palettes, identity + +- [x] Port `actor_color` to `quarto-core/src/attribution/palette.rs` + with the TS-drift-mitigation doc-comment. *(Landed alongside Phase 5a + in commit `89bfbd9f` — the TS-side palette siblings shipped together + with the Rust ones.)* +- [x] Add `fnv1a_hex8` helper in `palette.rs` (5-line FNV-1a, zero + deps). Used by `GitBlameProvider` to pre-hash emails before + feeding `actor_color`; the TS sibling plays the same role for + Automerge actor IDs (Phase 5). +- [x] Wire `GitBlameProvider` to synthesize the `IdentityMap` + (mail-local-part + `actor_color(fnv1a_hex8(email))`) — required + for the Phase 6 producer invariant. *(Landed with Phase 3a in + `dfe3ca12`.)* Phase 6 fleshed out test #12 from a placeholder + into two fixture-driven producer-invariant assertions + (`gitblame_single_author_fixture_satisfies_producer_invariant`, + `gitblame_multi_author_fixture_satisfies_producer_invariant`) that + pin the deterministic colour for `alice@example.com` (`hsl(253, + 60%, 55%)`) and `bob@example.com` (`hsl(220, 60%, 55%)`) and pin + the Arc-interning invariant between run actors and identity-map + keys. Required extracting an `attribution_from_porcelain` helper + from `GitBlameProvider::build` so the porcelain → AttributionData + path is testable without a `RenderContext`. +- [x] Verify `PreBuiltAttributionProvider` satisfies the producer + invariant. The hub-client TS replay code is responsible for + filling every actor's `IdentityMap` entry at the wire — using + Automerge profile metadata when present and the + `(actor.slice(0, 8), actorColor(fnv1aHex8(actor)))` fallback + otherwise. *(TS-side fallback wired in Phase 5b; Rust-side + `PreBuiltAttributionProvider` re-interns through + `AttributionDataBuilder` so the Arc-ptr-eq invariant holds — pinned + by `transport_round_trip_restores_arc_interning_via_prebuilt_provider` + in `attribution_types.rs`.)* +- [x] Implement the render-side warning path: emit a diagnostic + warning naming any actor missing from + `ctx.attribution_data.identities`, then use the placeholder + `Identity { display_name: "", color: "#888888" }`. *(Landed + with the Phase 4 render transform in `b07a4bb9`; exercised by + `render_q2_debug_warning_path_emits_diagnostic_and_placeholder` and + `render_html_warning_path_populates_format_options_and_emits_one_diagnostic`.)* +- [x] Add `docs/authoring/attribution.qmd` user-facing doc. + +### Phase 7 — Verification + +- [x] `cargo build --workspace` — clean, 0 warnings. +- [x] `cargo nextest run --workspace` — 8861 tests run, 8861 passed, + 195 skipped (36.9s). +- [x] `cargo xtask verify` — 9/9 steps green. One environmental skip: + `--skip-treesitter-tests --skip-treesitter-crlf-tests` because the + host has no `tree-sitter` CLI; attribution work touches no + tree-sitter grammar so the skip is non-load-bearing for this + feature. CI runs the full step. +- [x] Manual end-to-end (CLI): + - Fixture: `/tmp/attr-e2e-kyoto/article.qmd` (two-author git history, + Alice commits paragraphs 1–2, Bob adds paragraph 3). + - Invocation: `target/debug/q2 render /tmp/attr-e2e-kyoto/article.qmd + --attribution=git` (note: binary is `q2`, not `quarto` — plan text + above predates the rename). + - Observed in `article.html`: + - `data-attr-actor="alice@example.com"` and + `data-attr-actor="bob@example.com"` both present on `

` and + per-word `` wrappers. + - Identity colours match Phase 6 pins: alice → `hsl(253, 60%, 55%)`, + bob → `hsl(220, 60%, 55%)`. + - `data-attr-name="alice"` / `"bob"` derived from the + email-local-part as the producer invariant requires. + - `data-attr-time` carries the commit Unix timestamp. + - Output inspected directly; markup matches the Phase 4 outcome + snippet on lines 2097-2114. +- [-] Manual end-to-end (hub-client browser): **not exercisable in v1.** + `ReactPreview.tsx:155` hard-codes `enabled: false`. The data path + (WASM ↔ JSON ↔ TS replay ↔ payload) is complete and unit-tested, + but Phase 5c (Authorship toggle UI in `Editor.tsx` + per-node colour + application in `ReactAstDebugRenderer.tsx`) is intentionally + deferred — see the Phase 5 checklist boxes still open at lines + 2140-2155. Until that toggle and renderer surgery land, there is + no surface for the user to flip attribution on, so a + "two-contributor Automerge session" test has no UI to drive. +- [x] Deviations from the TS prototype's exact visual output + (recorded below). + +#### Phase 7 deviation log (TS prototype → Rust v1) + +1. **Prose coalescing is deferred.** The current HTML writer emits + one `` per `Inline::Str`, matching the TS + prototype's wrap-every-node behaviour but **not** the + "one wrapper per contiguous same-(actor, time) run" outcome that + Phase 0 test scaffolds `render_html_coalescing_groups_*` and + `render_html_attribution_on_source_locations_off_compose_orthogonally` + anticipate. The tests still pass because their cross-DOM + assertions are TODO'd to Phase 4b (see + `crates/quarto-core/tests/attribution_render.rs:289, 326, 376`). + Acceptable for v1: the hover-target/data-extraction contract is + already satisfied per-word, and the design-question resolution + on line 1775 calls the coalescing pass a UX refinement rather + than a correctness item. Tracked for v2 alongside the existing + Phase 4b TODOs. +2. **q2-debug JSON wire shape is incomplete.** Plan line 2055-2063: + "Emit `astContext.attribution` array and `astContext.attributionActors` + table in JSON writer" is deferred. The transform populates + `format_options.json.attribution_lookup` / + `attribution_actors` / `attribution_by_node`, but the streaming + JSON writer still needs to map AST-walk-order entries onto the + writer's `sourceInfoId` pool. Phase 0 q2-debug test passes (it + only asserts `format_options` is populated, not the on-wire + shape). +3. **Hub-client Authorship toggle + renderer colouring (Phase 5c).** + Wholly deferred; see the "not exercisable in v1" row above. +4. **Binary name.** Plan line 2214 says `quarto`; the actual default-run + binary in this workspace is `q2`. Cosmetic discrepancy, no code + change required. + +## Non-goals for v1 + +- Caching attribution across renders. Re-blame on every render is fine + (git-blame on a 100K-line file completes in ~50 ms). +- Project-wide attribution. Each document's pipeline has its own + attribution provider; cross-document attribution (a project sidebar + showing "X contributed to N pages") is a v2 feature that would consume + the per-doc `ctx.attribution_data` and aggregate (or, if it needs + to survive across pipeline runs, materialize through a project-index + artifact). The data shape in v1 is designed to support it without + re-engineering. +- Attribution diffs / time-range queries ("who edited this in the last + week?"). Trivial to add atop `AttributionMap`; not requested. + +## References + +- **Fork point for the implementation branch:** `main`. The + implementation branch does not inherit prototype code; the prototype + is reference material only. See the Branch context section for the + full anchor. +- **Prototype source files (all on `feat/node-attribution`, reference + only):** + - `hub-client/src/services/attribution-runs.ts` — the RLE producer; + most informative single file. This is the design we'd port + verbatim (cherry-pick or rewrite) when building the implementation + branch's producer-side TS. + - `hub-client/src/services/attribution.ts`, + `hub-client/src/services/attribution-gitblame.ts` (and its `.test.ts`), + `hub-client/src/hooks/useAttribution.ts`, + `hub-client/src/components/ReactAstDebugRenderer.tsx` — + classified in the Phase 5 reference-material subsection + (algorithm reference / rewrite / replaced wholesale / clean + implementation). +- **Prior TS plan (historical context, superseded by this plan):** + `claude-notes/plans/2026-04-15-node-attribution.md` on + `feat/node-attribution` (not on `main`). +- Pipeline registration site: `crates/quarto-core/src/pipeline.rs:735-863` + (Navigation Phase 780-847, Finalization Phase 849-860 as of #169). +- Navbar generate pattern: `crates/quarto-core/src/transforms/navbar_generate.rs`. +- Navbar render pattern: `crates/quarto-core/src/transforms/navbar_render.rs`. +- `AstTransform` trait: `crates/quarto-core/src/transform.rs:69-90`. +- `RenderContext` struct: `crates/quarto-core/src/render.rs:84-188` + (`resolved_listings` field added at line 187 by #169; the v1 + attribution work adds `attribution_provider` (opt-in signal), + `attribution_data` (sidecar `Arc` carrying the + canonical merged form between Generate and Render), and + `format_options` (writer-side pre-baked lookup) alongside it). +- HTML writer hook: `crates/pampa/src/writers/html.rs:601-655`. +- JSON writer config: `crates/pampa/src/writers/json.rs:51-100`. +- `SourceInfo` chain resolution: `crates/quarto-source-map/src/mapping.rs:15-87`. +- WASM entry points: + - `parse_qmd_to_ast` (q2-debug; the v1 attribution carrier): + `crates/wasm-quarto-hub-client/src/lib.rs:855`. + - `render_qmd` (HTML preview, out of scope for v1): line 1005. + - `render_qmd_content` (path-less HTML preview, out of scope): line 1152. + - `render_page_in_project` (Phase 9 project-aware HTML preview, + out of scope): line 1292. +- CLI render args: `crates/quarto/src/commands/render.rs:40-67`. diff --git a/claude-notes/plans/2026-05-13-q2-preview-attribution.md b/claude-notes/plans/2026-05-13-q2-preview-attribution.md new file mode 100644 index 000000000..d284d1c48 --- /dev/null +++ b/claude-notes/plans/2026-05-13-q2-preview-attribution.md @@ -0,0 +1,440 @@ +# q2-preview attribution wiring + +## Overview + +Extend the attribution pipeline (`claude-notes/plans/2026-05-06-attribution-pipeline.md`) +to the q2-preview render path. Same JSON wire format as q2-debug, same Option A +producer (`PreBuiltAttributionProvider`), no new transform design — just the +small amount of glue needed to drive the existing stages from +`render_qmd_to_preview_ast` and to expose an attribution-aware WASM entry point. + +**Prerequisite:** the attribution-pipeline plan lands first. This plan +references its types (`AttributionData`, `AttributionRecord`, +`PreBuiltAttributionProvider`, `attribution_lookup` / `attribution_actors` +on `JsonConfig`, the two transforms) and assumes Phase 0–4a of that plan +are merged. Nothing here changes its design; the work is purely additive. + +## Why this is short + +The attribution-pipeline plan already does the hard parts: + +- `AttributionGenerateTransform` registers at the end of the Navigation Phase + and `AttributionRenderTransform` at the end of the Finalization Phase + inside `build_transform_pipeline`. `build_q2_preview_transform_pipeline` + is `build_transform_pipeline` with a small exclusion list + (`Q2_PREVIEW_TRANSFORM_EXCLUDED`, `pipeline.rs:1053`). Neither attribution + stage appears in that list, so registration is **automatic for q2-preview** + the day the attribution PR lands. +- `JsonConfig` already carries `attribution_lookup` / + `attribution_actors` (added by the attribution plan's Phase 4a), + and `render_qmd_to_preview_ast` already serialises via + `pampa::writers::json::write_with_config`. Plumbing is one extra + field read. + +The only producer-side seam is the WASM boundary: q2-preview's WASM +entry is `render_page_in_project`, which does not today accept an +attribution payload. + +## Work items + +### Phase 0 — failing tests + +- [x] **Native end-to-end (preview pipeline, no WASM).** Test + `render_qmd_to_preview_ast_surfaces_attribution_when_provider_installed` + added next to `render_qmd_to_preview_ast_preserves_callout_custom_node` + in `crates/quarto-core/src/pipeline.rs`. Installs a + `PreBuiltAttributionProvider` on `ctx.attribution_provider`, + runs `render_qmd_to_preview_ast` against `"Hello world!"`, + asserts both `astContext.attribution` and + `astContext.attributionActors` keys present and carry the + expected `actor` / `name` / `color`. The same test also runs + a baseline render with no provider and asserts neither key + appears — the byte-identicality regression guard. +- [x] **WASM boundary test (native equivalent).** + `wasm-quarto-hub-client` is `cdylib`-only — its bindings can't + be exercised by native tests. Both branches of + `render_page_in_project_with_attribution` converge on + `RenderToPreviewAstRenderer::with_attribution(json)` for the + multi-doc case, so the equivalent native contract was added as + `render_to_preview_ast_renderer_with_attribution_surfaces_keys` + in `crates/quarto-core/tests/render_page_in_project.rs`. + Drives `ProjectPipeline` with + `RenderMode::ActivePage` and asserts the resulting + `Pass2Payload::AstJson` carries the expected keys. + +Both tests were red before Phase 1 implementation. The first +red-pass cycle additionally surfaced an underlying issue not +called out in the plan: `run_pipeline` was not transferring +`stage_ctx.format_options` back to `ctx.format_options`, so the +attribution data populated inside `AstTransformsStage` was +invisible to the JSON writer that runs *outside* the pipeline. +Fixed alongside Phase 1; see "Phase 1 deviation note" below. + +### Phase 1 — plumb `JsonConfig` in `render_qmd_to_preview_ast` + +- [x] **Plumbed.** At `crates/quarto-core/src/pipeline.rs:835` the + `JsonConfig` literal now reads + `ctx.format_options.json.attribution_by_node` and + `ctx.format_options.json.attribution_actors`, converting from + `quarto_core::AttributionRecord` / `Identity` to + `pampa::JsonAttributionRecord` / `JsonAttributionIdentity` at + the crate boundary. Conversion mirrors the inline pattern in + `wasm-quarto-hub-client/src/lib.rs` (the q2-debug WASM entry). + When no provider was installed, both fields stay `None` and + the JSON output is byte-identical to baseline (verified by + the no-provider arm of the Phase 0 test). + +**Phase 1 deviation note.** The plan describes the change as +"one struct-literal expansion, no new types". That alone was +insufficient. `AttributionRenderTransform` runs *inside* +`AstTransformsStage`, which receives a `StageContext` (not the +outer `RenderContext`); the transform writes to +`stage_ctx.format_options.json.*`. `run_pipeline` previously only +transferred `stage_ctx.artifacts` and `stage_ctx.resource_report` +back to the outer ctx — `format_options` was silently dropped. +The HTML pipeline doesn't notice because its writer +(`RenderHtmlBodyStage`) runs *inside* the same pipeline and reads +`stage_ctx.format_options` directly. The q2-preview JSON writer +runs *outside* the pipeline, so without the transfer the writer +saw a default `format_options` with both attribution fields +`None`. The fix adds `ctx.format_options = stage_ctx.format_options` +after the pipeline run, alongside the existing artifact / +resource-report transfers. Pre-pipeline callers don't write +`ctx.format_options`, so the overwrite is safe (audited: +`grep ctx.format_options.` across `crates/` finds no +pre-`run_pipeline` writers). + +Field-name divergence from the plan's pseudocode: the actual slot +is named `attribution_by_node` (pointer-keyed map for the writer) +plus `attribution_actors`. The HTML/JSON sibling fields under +`ctx.format_options.html` use slightly different names (e.g. +`attribution_identities`); the JSON side is what mattered here. + +### Phase 2 — new WASM entry point + +- [x] **Builder + ctx install on `RenderToPreviewAstRenderer`.** + Added `attribution_json: Option` field and + `with_attribution(json) -> Self` builder on + `RenderToPreviewAstRenderer` + (`crates/quarto-core/src/project/pass2_renderer.rs`). + `render()` installs a `PreBuiltAttributionProvider` on the + per-page ctx right after `RenderContext::new`, using + `Arc::new(...)` to match the actual + `Option>` type on + `RenderContext` (the plan's pseudocode showed `Box`, but + q2-debug uses `Arc`; matched the existing pattern). +- [x] **WASM entry point split.** Added + `render_page_in_project_with_attribution(path, user_grammars, + attribution_json)` as the real implementation and converted + `render_page_in_project(path, user_grammars)` into a one-line + wrapper forwarding `attribution_json = None`. Single-doc branch + installs the provider directly on `ctx`; multi-doc branch + threads the JSON into `RenderToPreviewAstRenderer::with_attribution`. + `render_single_doc_to_response` and + `render_project_active_page_to_response` each gained an + `attribution_json: Option` parameter; the only callers + passing `Some` are the new entry point's branches, every other + caller passes `None`. + +Mirror the `parse_qmd_to_ast_with_attribution` shape from the +attribution plan (its Phase 5): + +```rust +#[wasm_bindgen] +pub async fn render_page_in_project_with_attribution( + path: &str, + user_grammars: Option, + attribution_json: Option, +) -> String { ... } +``` + +Body: identical to today's `render_page_in_project` +(`crates/wasm-quarto-hub-client/src/lib.rs:1097`) except that the +provider gets installed on the active-page `RenderContext` before +the pipeline runs. The two branches differ in *where* that install +happens, because the active-page ctx is constructed in different +places: + +- **Single-doc branch** (`render_single_doc_to_response`, + `lib.rs:1146`). The ctx is built in-line at line 1169. Install + directly after construction: + + ```rust + let mut ctx = RenderContext::new(...).with_options(options); + if let Some(json) = attribution_json { + ctx.attribution_provider = + Some(Box::new(PreBuiltAttributionProvider::new(json))); + } + ``` + +- **Multi-doc branch** (`render_project_active_page_to_response`, + `lib.rs:1275`). The active-page ctx is built *inside* + `RenderToPreviewAstRenderer::render()` + (`crates/quarto-core/src/project/pass2_renderer.rs:586`), so the + WASM entry point can't reach it. Mirror the existing + `RenderToHtmlRenderer::with_user_grammars` pattern (`lib.rs:1347`): + add a builder method on the renderer, and let it install the + provider on the ctx it constructs: + + ```rust + // crates/quarto-core/src/project/pass2_renderer.rs + pub struct RenderToPreviewAstRenderer { + vfs_root: std::path::PathBuf, + attribution_json: Option, + } + + impl RenderToPreviewAstRenderer { + pub fn with_attribution(mut self, json: String) -> Self { + self.attribution_json = Some(json); + self + } + } + + // inside render(), right after `let mut ctx = RenderContext::new(...)`: + if let Some(json) = self.attribution_json.clone() { + ctx.attribution_provider = + Some(Box::new(PreBuiltAttributionProvider::new(json))); + } + ``` + + And at the WASM call site (`lib.rs:1330`): + + ```rust + let mut renderer = RenderToPreviewAstRenderer::new("/.quarto/project-artifacts"); + if let Some(ref json) = attribution_json { + renderer = renderer.with_attribution(json.clone()); + } + ``` + +`render_page_in_project` becomes a thin wrapper: + +```rust +pub async fn render_page_in_project( + path: &str, + user_grammars: Option, +) -> String { + render_page_in_project_with_attribution(path, user_grammars, None).await +} +``` + +The wrapper shape — no extra side effects between the two entry +points — is the same byte-identicality contract the attribution plan +pins on `parse_qmd_to_ast`. Every existing caller silently routes +through the new function; a regression on the `None` branch would +break all q2-preview renders, not just attributed ones. + +Multi-doc note: re-discovery from the project root (`lib.rs:1133`) +happens *before* the renderer is constructed, so the renderer's +`with_attribution` builder is the correct attachment point for the +active-page ctx. Sibling Pass-1 ctxs do not receive a provider — +sidebar / cross-doc machinery never reads `ctx.attribution_provider`, +and the TS replay only produces a payload for the active edited +doc (see Resolved question #2 below). + +### Phase 3 — TS caller + +Originally framed as out-of-scope, but completed in this session +to make the colored text / mouseover actually appear in the +q2-preview iframe. The minimum producer-side TS: + +- [x] **WASM type interface + TS wrapper.** Added + `render_page_in_project_with_attribution` to the + `WasmModuleExtended` interface in + `hub-client/src/services/wasmRenderer.ts`, plus an exported + `renderPageInProjectWithAttribution(path, userGrammars, + attributionJson)` TS function. The existing + `renderPageInProject` was converted into a one-line wrapper + that forwards `attributionJson = null`, mirroring the WASM-side + wrapper relationship. +- [x] **q2-preview branch wiring.** Updated + `hub-client/src/components/render/ReactPreview.tsx`'s + `doRender` to call `renderPageInProjectWithAttribution` and + pass `options.attributionJson` through. The same `useAttribution` + hook that produces the q2-debug payload was already running for + q2-preview (it's format-agnostic — keyed on + `attributionEnabled` + `currentFile.path`), so the React side + needed no other changes. `` (`framework/Ast.tsx`) + automatically picks up `astContext.attribution*` keys and + threads them through `AttributionLookupContext`, which the + leaf renderers consume to paint per-author backgrounds and + tooltips — the same machinery that already serves q2-debug. + +**Initial Phase 3 underestimate — consumer side was missing.** +The first pass at Phase 3 routed the attribution payload through +the new WASM entry point and assumed the consumer side was free +because `` (from `framework/Ast.tsx`) was already wiring up +`AttributionLookupContext`. That turned out to be necessary but +not sufficient. Manual testing in the live UI showed no colored +text and no hover effect on q2-preview. Cause: only q2-debug's +`Block` / `Inline` dispatchers were calling `useNodeAttribution` +and wrapping nodes in `.q2-attr-wrap`. q2-preview's dispatchers +(`q2-preview/dispatchers.tsx`) and its document-root component +(`PreviewDocument.tsx`) had no equivalent wiring. The +`AttributionLookupContext.Provider` was populated, but nothing +in q2-preview was consuming it. + +Consumer-side wiring added in a second pass: + +- [x] **Shared widget moved to framework.** `attribution.tsx` + (which exports `AttributionBadge` + `attributionStyles` + + `formatRelativeTime`) was moved from `q2-debug/` to + `framework/` and re-exported through `framework/index.ts`. + `q2-debug/components.tsx`'s import path updated to + `from '../framework'`. +- [x] **q2-preview dispatchers wrap on hit.** `Block` / `Inline` + / `CustomBlock` / `CustomInline` in + `q2-preview/dispatchers.tsx` now call `useNodeAttribution` + and, on hit, wrap the dispatched output in a + `.q2-attr-wrap` div/span with `data-sid` + inline `color`. + Off-path the dispatchers stay byte-identical (early return + on `!attribution`). `CustomBlock` / `CustomInline` carry the + wrap too because q2-preview preserves CustomNodes + (Callout/Theorem/...) that span larger source ranges than + Pandoc primitives. +- [x] **`PreviewDocument` mounts styles + hover handlers.** When + `AttributionLookupContext` is populated the document-root + component injects ``, + attaches event-delegated mouseover/mouseout to its outer + div, and renders a single floating `AttributionBadge` for + the hovered `.q2-attr-wrap[data-sid]`. Mirrors q2-debug's + `AstRenderer`. Off-path the stylesheet and handlers are + skipped, leaving the DOM byte-identical to today's. The + minimal-mode branch (Fragment return) wraps in a div only + when attribution is on, preserving byte-identicality in the + off-path minimal case. +- [x] **q2-preview integration test.** + `q2-preview/attribution.integration.test.tsx` mirrors the + q2-debug counterpart: four scenarios (off path; on path + wrapping; hover surfaces badge; missing actor identity + falls through) against `previewRegistry`. All four pass. + +Without this second pass, the Authorship toggle was visibly a +no-op for q2-preview documents in the live UI even though the +WASM entry point was correctly shipping `astContext.attribution*` +in the AST JSON. + +### Phase 4 — verification + +- [x] `cargo nextest run --workspace` — 8859 tests pass, 195 + skipped. Includes the two new Phase 0 tests. +- [x] `cd hub-client && npm run build:all` — WASM build + + TypeScript build both succeed. Hub-client `npm run test:ci` + also passes (84 tests). +- [ ] **Browser end-to-end check not exercised — no browser + available in this environment.** Per `CLAUDE.md` ("End-to-end + verification before declaring success" → "If you cannot test a + feature end-to-end (e.g. no access to a browser for a + hub-client change), say so explicitly"), this is reported + rather than claimed. Layered coverage substitutes: + - Pipeline-level: + `render_qmd_to_preview_ast_surfaces_attribution_when_provider_installed` + pins the JSON wire format. + - Renderer-level: + `render_to_preview_ast_renderer_with_attribution_surfaces_keys` + pins the orchestrator path through + `RenderToPreviewAstRenderer::with_attribution`. + - Hub-client `npm run test:ci` — 84 tests pass including the + existing q2-debug attribution integration test; q2-preview + consumes the same `` + `AttributionLookupContext` + machinery, so the consumer-side rendering is shared. + + The user should still open a q2-preview document with the + Authorship toggle on in the live UI to confirm visually. +- [x] **Byte-identicality spot check** is covered by the no-provider + baseline arm of the Phase 0 pipeline test. + +## Out of scope + +- HTML inline `data-attr-*` for q2-preview. q2-preview emits AST JSON, + not HTML; the inline form is only for the HTML CLI path. The + AST-iframe React renderer consumes the JSON's + `astContext.attribution*` table directly. +- q2-slides. Slated to migrate to the q2-preview pipeline pattern + in a future plan (Plan 1 §"Decision A"); attribution comes along + for free once that migration lands. +- Editing attribution in q2-preview. q2-preview is read-only in + v1 (Plan 1 §"q2-preview routing"). +- Linking `automerge-rs` into wasm-quarto-hub-client (Option B in + the attribution plan). Locked to Option A for v1. + +## Resolved questions + +Both questions had implicit answers in earlier drafts. Locking +them in here so the implementer doesn't have to re-derive the +rationale, and so a future v2 has a written record of what was +considered. + +### 1. WASM entry shape — two functions, not one with `Option` + +**Decision:** ship `render_page_in_project_with_attribution(path, +user_grammars, attribution_json)` as the real implementation and +keep `render_page_in_project(path, user_grammars)` as a one-line +wrapper that forwards `None`. Same shape as +`parse_qmd_to_ast` / `parse_qmd_to_ast_with_attribution` (the +attribution plan's Phase 3b). + +Rationale: + +- **Byte-identicality at the function boundary.** Existing TS + callers don't pass `attributionJson` and never learn of the new + argument. Any regression on the `attribution_json = None` path + immediately breaks *all* q2-preview renders (not just attributed + ones), so the wrapper itself is the regression alarm. A single + `Option` parameter would still satisfy the contract, but + it offers no extra protection and forces every TS call site to + thread an explicit `null`/`undefined`. +- **Symmetry with `parse_qmd_to_ast_*`.** The two WASM surfaces + (q2-debug and q2-preview) become directly comparable: same naming + convention, same wrapper relationship, same byte-identicality + story. A reader who understands one understands the other. +- **wasm-bindgen ergonomics.** Two clearly-named exports are easier + to grep, type, and discover from TS than a third optional argument + whose null/undefined/string semantics aren't statically obvious + at the boundary. +- **Zero churn for existing TS callers.** `Editor.tsx`, + `ReactPreview.tsx`, `PreviewRouter.tsx` etc. keep calling + `render_page_in_project(path, grammars)` unchanged. The + Authorship-on q2-preview branch (Phase 3) is the *only* new + caller, and it calls the `_with_attribution` shim directly. + +The only conceivable reason to diverge would be if `Option` +parameter handling at the wasm-bindgen boundary became materially +cheaper than a wrapper call, which is not the case today. + +### 2. No provider plumbing during Pass-1 + +**Decision:** the multi-doc branch installs the provider only on +the Pass-2 active-page ctx (via +`RenderToPreviewAstRenderer::with_attribution`, see Phase 2). +Sibling Pass-1 ctxs receive no provider and produce no attribution. + +Rationale: + +- **The generate stage already no-ops without a provider.** + `AttributionGenerateTransform`'s skip ladder (attribution plan + Phase 2, rule 3) bails on `ctx.attribution_provider.is_none()`. + So even if attribution-generate ran during sibling Pass-1 (it + would, since the stage registers in the Navigation Phase tail + of `build_transform_pipeline`), the stage exits immediately with + zero work. No special-casing required. +- **No Pass-1 consumer reads attribution.** Sidebar, navbar, + cross-doc link rewriting, and the profile checkpoint all read + `ProjectIndex` / `DocumentProfile` data; none reads + `ctx.attribution_data`. The output of `AttributionRenderTransform` + is consumed only by the JSON/HTML writers, and Pass-1 doesn't + invoke either writer (it produces profiles, not rendered output). +- **No data to ship anyway.** The TS replay + (`useAttribution.ts`, attribution plan Phase 5) produces a JSON + payload only for the *active edited document* — Automerge + history is per-doc, and the hub-client only opens history for + the file currently in the editor. There is no + sibling-doc-attribution payload to plumb. +- **Future cross-doc author features belong on `ProjectIndex`.** + A "show contributor list per page in the sidebar" feature would + not extend the Pass-1 provider plumbing; it would add a field to + `ProjectIndex` populated by a project-scope stage that aggregates + identities. That's a v2 plan, not a v1 expansion of this one. + +Reopening this would require both (a) a TS source that produces +sibling-doc attribution payloads and (b) a downstream consumer +beyond the writers. Neither exists today. diff --git a/claude-notes/plans/2026-05-14-attribution-auto-viewer.md b/claude-notes/plans/2026-05-14-attribution-auto-viewer.md new file mode 100644 index 000000000..acd08b708 --- /dev/null +++ b/claude-notes/plans/2026-05-14-attribution-auto-viewer.md @@ -0,0 +1,524 @@ +# Attribution: auto-inject viewer CSS/JS + +## Overview + +When a user runs `quarto render --attribution=git` (or sets `attribution: git` +in YAML), Quarto emits per-node `data-attr-*` attributes on wrapping ``s. +Those attributes are inert: without CSS and JS to react to them, the rendered +page is visually identical to one rendered without `--attribution=git`. The +feature feels broken unless the user also copy-pastes the ~70-line snippet +currently documented under "Adding a viewer overlay" in +`docs/authoring/attribution.qmd`. + +This plan replaces that copy-paste section with automatic injection of a small +CSS + JS pair into the rendered HTML whenever attribution is active for an +HTML render. The defaults are deliberately conservative — a dotted underline +on attributed text and a hover badge — so the feature is discoverable without +overriding theme-set body colours. + +## Decisions pinned before implementation + +- **Auto-inject by default; opt out via YAML.** When effective attribution + mode is `git` and output is HTML, CSS + JS ship automatically. The opt-out + is the rich YAML form: `attribution: { source: git, viewer: false }`. + No new CLI flag — the use case for "data attributes but no presentation" is + rare and the YAML knob covers it. +- **Neutral by default.** The injected JS does **not** repaint each wrapped + element in its author's colour (the doc snippet does that today, at + `attribution.qmd:200-206`). The wrapper inherits whatever colour the host + theme assigns; only the hover badge is author-coloured. This minimizes + visual interference with site themes. +- **Inline ` + + + ... +

+ Alice wrote this paragraph. +

+ ... + + + + ``` + +- **Inspection notes**: rendered HTML was read directly (not via a + browser; this is a headless verification). All four contract + artefacts present: dotted-underline CSS, `q2-attr-badge` classes + inside the auto-injected `", + sentinel = CSS_SENTINEL, + base = VIEWER_CSS, + identities = identities_css, + ); + let js_payload = format!("{}\n", JS_SENTINEL, VIEWER_JS); + + append_with_sentinel(&mut ast.meta, "header", CSS_SENTINEL, css_payload); + append_with_sentinel(&mut ast.meta, "after-body", JS_SENTINEL, js_payload); + + Ok(()) + } +} + +/// Render one CSS rule per actor in `identities`. Each rule publishes +/// `--attr-color` and `--attr-name` on `[data-attr-actor=""]`; +/// the base paint rule in `viewer.css` then consumes `--attr-color` +/// via the cascade. Iteration order is sorted by actor key so the +/// emitted CSS is deterministic across renders. +/// +/// Returns an empty string when there are no identities to publish. +/// Non-empty output always starts with a newline and ends with one so +/// concatenation with the static `viewer.css` stays tidy. +fn render_per_actor_rules(identities: &IdentityMap) -> String { + if identities.is_empty() { + return String::new(); + } + let mut entries: Vec<(&str, &str, &str)> = identities + .iter() + .map(|(actor, id)| (actor.as_ref(), id.display_name.as_str(), id.color.as_str())) + .collect(); + entries.sort_unstable_by_key(|(actor, _, _)| *actor); + + let mut out = String::new(); + out.push('\n'); + for (actor, name, color) in entries { + let _ = writeln!( + out, + "[data-attr-actor=\"{actor}\"] {{ --attr-color: {color}; --attr-name: \"{name}\"; }}", + actor = escape_css_string(actor), + color = color, + name = escape_css_string(name), + ); + } + out +} + +/// Escape a Rust `&str` for safe inclusion inside a double-quoted CSS +/// string. Per the CSS Syntax Level 3 spec, only `"`, `\`, and raw +/// newlines are forbidden in a `"…"` token — escape those three. Other +/// characters (including `@`, `.`, `+`, non-ASCII) round-trip +/// unchanged. +fn escape_css_string(input: &str) -> String { + let mut out = String::with_capacity(input.len()); + for ch in input.chars() { + match ch { + '\\' => out.push_str("\\\\"), + '"' => out.push_str("\\\""), + '\n' => out.push_str("\\A "), + '\r' => out.push_str("\\D "), + _ => out.push(ch), + } + } + out +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use super::{escape_css_string, render_per_actor_rules}; + use crate::attribution::{Identity, IdentityMap}; + + #[test] + fn renders_one_rule_per_actor_sorted() { + let mut m = IdentityMap::new(); + m.insert( + Arc::from("bob@example.com"), + Identity { + display_name: "Bob".into(), + color: "#88CCEE".into(), + }, + ); + m.insert( + Arc::from("alice@example.com"), + Identity { + display_name: "Alice".into(), + color: "#CC6677".into(), + }, + ); + let css = render_per_actor_rules(&m); + // Alphabetical order — alice's rule appears before bob's. + let alice_at = css + .find("[data-attr-actor=\"alice@example.com\"]") + .expect("alice rule present"); + let bob_at = css + .find("[data-attr-actor=\"bob@example.com\"]") + .expect("bob rule present"); + assert!(alice_at < bob_at, "actors emitted sorted ascending"); + assert!(css.contains("--attr-color: #CC6677")); + assert!(css.contains("--attr-name: \"Alice\"")); + } + + #[test] + fn empty_identities_emit_empty_string() { + let m = IdentityMap::new(); + assert!(render_per_actor_rules(&m).is_empty()); + } + + #[test] + fn escape_css_string_passes_safe_chars_through() { + assert_eq!(escape_css_string("alice@example.com"), "alice@example.com"); + assert_eq!(escape_css_string("Alice O'Hara"), "Alice O'Hara"); + // Newlines and quotes get escaped. Backslash too. + assert_eq!(escape_css_string("a\"b\\c\nd"), "a\\\"b\\\\c\\A d"); + } +} + +/// Append `payload` to `meta.rendered.includes.`, skipping if +/// any existing string in that slot already contains `sentinel`. The +/// dedup keeps the transform idempotent under accidental double +/// invocation (e.g. tests that rerun the same transform). +fn append_with_sentinel(meta: &mut ConfigValue, slot: &str, sentinel: &str, payload: String) { + if !matches!(&meta.value, ConfigValueKind::Map(_)) { + return; + } + let source_info = meta.source_info.clone(); + + if !meta.contains_path(&["rendered", "includes", slot]) { + meta.insert_path( + &["rendered", "includes", slot], + ConfigValue::new_array(vec![], source_info.clone()), + ); + } + + let Some(target) = meta.get_path_mut(&["rendered", "includes", slot]) else { + return; + }; + let ConfigValueKind::Array(items) = &mut target.value else { + return; + }; + if items + .iter() + .any(|item| item.as_str().is_some_and(|s| s.contains(sentinel))) + { + return; + } + items.push(ConfigValue::new_string(payload, source_info)); +} diff --git a/crates/quarto-core/src/transforms/mod.rs b/crates/quarto-core/src/transforms/mod.rs index bbade1665..bd2b96078 100644 --- a/crates/quarto-core/src/transforms/mod.rs +++ b/crates/quarto-core/src/transforms/mod.rs @@ -29,6 +29,9 @@ //! can be added to a [`TransformPipeline`](crate::transform::TransformPipeline). mod appendix; +mod attribution_generate; +mod attribution_render; +mod attribution_viewer; mod callout; mod callout_resolve; mod categories_sidebar; @@ -69,6 +72,9 @@ mod website_favicon; mod website_title_prefix; pub use appendix::AppendixStructureTransform; +pub use attribution_generate::AttributionGenerateTransform; +pub use attribution_render::AttributionRenderTransform; +pub use attribution_viewer::AttributionViewerTransform; pub use callout::CalloutTransform; pub use callout_resolve::CalloutResolveTransform; pub use categories_sidebar::CategoriesSidebarTransform; diff --git a/crates/quarto-core/tests/attribution_baseline_snapshot.rs b/crates/quarto-core/tests/attribution_baseline_snapshot.rs new file mode 100644 index 000000000..a6d968fb0 --- /dev/null +++ b/crates/quarto-core/tests/attribution_baseline_snapshot.rs @@ -0,0 +1,73 @@ +//! Phase 0 — HTML off-path baseline snapshot. +//! +//! Pins the rendered HTML body of a small attribution-free document +//! so that any unintended change to the writer (e.g. accidentally +//! emitting `data-attr-*` when no provider is installed) shows up +//! immediately as a snapshot diff. +//! +//! This test is **GREEN immediately and stays green**: it asserts +//! existing behaviour and is the regression guard the plan's "byte- +//! identical when off" promise leans on. As Phase 4c lands, the +//! attribution-render transform will be registered in the pipeline, +//! but the off-path (no provider installed) must continue to produce +//! exactly this snapshot. + +use std::sync::Arc; + +use quarto_core::pipeline::{HtmlRenderConfig, render_qmd_to_html}; +use quarto_core::project::{DocumentInfo, ProjectConfig, ProjectContext}; +use quarto_core::render::{BinaryDependencies, RenderContext}; +use quarto_core::{Format, QuartoError}; +use quarto_system_runtime::{NativeRuntime, SystemRuntime}; + +const FIXTURE: &str = "# Hello, world\n\nThis is a paragraph.\n"; + +#[tokio::test] +async fn attribution_off_html_baseline() -> Result<(), QuartoError> { + let dir = std::env::temp_dir().join("attribution-baseline-snapshot"); + std::fs::create_dir_all(&dir).expect("create temp dir"); + let qmd_path = dir.join("doc.qmd"); + std::fs::write(&qmd_path, FIXTURE).expect("write fixture"); + + let project = ProjectContext { + dir: dir.clone(), + config: ProjectConfig::default(), + is_single_file: true, + files: vec![DocumentInfo::from_path(qmd_path.clone())], + output_dir: dir.clone(), + }; + let doc = DocumentInfo::from_path(qmd_path.clone()); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = RenderContext::new(&project, &doc, &format, &binaries); + + let runtime: Arc = Arc::new(NativeRuntime::new()); + let config = HtmlRenderConfig::default(); + + let output = render_qmd_to_html( + FIXTURE.as_bytes(), + &qmd_path.to_string_lossy(), + &mut ctx, + &config, + runtime, + ) + .await?; + + // We snapshot just the body — the surrounding template includes + // many platform-dependent paths (CSS hash filenames, dist + // directories) that would create noisy diffs. + let body_marker = output.html.find("").expect("".len()]; + + // Sanity: in the off-path, the body must NOT contain any + // attribution-related markup. This is the property the snapshot + // re-asserts mechanically across the corpus. + assert!( + !body.contains("data-attr-"), + "off-path HTML must contain no data-attr-* attributes; body:\n{body}" + ); + + insta::assert_snapshot!("attribution_off_baseline", body); + Ok(()) +} diff --git a/crates/quarto-core/tests/attribution_chain_resolution.rs b/crates/quarto-core/tests/attribution_chain_resolution.rs new file mode 100644 index 000000000..7c76f5e71 --- /dev/null +++ b/crates/quarto-core/tests/attribution_chain_resolution.rs @@ -0,0 +1,89 @@ +//! Phase 0 test #8 — `SourceInfo` chain resolution pin. +//! +//! A node whose `SourceInfo` is `Substring(parent=Original{0..20}, +//! 5..10)` resolves to file 0, bytes 5..10 *in the original file*, +//! not 5..10 in the substring. This already works for `map_offset` +//! in `quarto-source-map/src/mapping.rs`; pinning it here guards the +//! contract attribution-lookup relies on. + +use quarto_source_map::types::{Location, Range}; +use quarto_source_map::{SourceContext, SourceInfo}; + +#[test] +fn map_offset_resolves_substring_chain_to_original_file_bytes() { + let mut ctx = SourceContext::new(); + let file_id = ctx.add_file( + "test.qmd".to_string(), + Some("0123456789ABCDEFGHIJ".to_string()), + ); + + let original = SourceInfo::from_range( + file_id, + Range { + start: Location { + offset: 0, + row: 0, + column: 0, + }, + end: Location { + offset: 20, + row: 0, + column: 20, + }, + }, + ); + + // Substring extracting bytes 5..10 ("56789") of the original. + let substring = SourceInfo::substring(original, 5, 10); + + // Map offset 0 in the substring → should land at byte 5 in the + // original file (not byte 5 in the substring's local coordinates). + let mapped = substring.map_offset(0, &ctx).expect("map offset"); + assert_eq!( + mapped.file_id, file_id, + "chain resolves back to the original file" + ); + assert_eq!( + mapped.location.offset, 5, + "offset is in original-file coordinates, not substring-local" + ); + + // Map offset 4 in substring → byte 9 of original. + let mapped = substring.map_offset(4, &ctx).expect("map offset"); + assert_eq!(mapped.location.offset, 9); +} + +#[test] +fn map_range_pinned_for_attribution_lookup_path() { + // Same fixture, but driving the range API: attribution-lookup + // resolves a node's (start, end) range through the chain to get + // back to original-file bytes. + let mut ctx = SourceContext::new(); + let file_id = ctx.add_file( + "test.qmd".to_string(), + Some("0123456789ABCDEFGHIJ".to_string()), + ); + + let original = SourceInfo::from_range( + file_id, + Range { + start: Location { + offset: 0, + row: 0, + column: 0, + }, + end: Location { + offset: 20, + row: 0, + column: 20, + }, + }, + ); + let substring = SourceInfo::substring(original, 5, 10); + + let (start, end) = substring.map_range(0, 5, &ctx).expect("map range"); + assert_eq!(start.file_id, file_id); + assert_eq!(end.file_id, file_id); + assert_eq!(start.location.offset, 5); + assert_eq!(end.location.offset, 10); +} diff --git a/crates/quarto-core/tests/attribution_cli.rs b/crates/quarto-core/tests/attribution_cli.rs new file mode 100644 index 000000000..b6bef7444 --- /dev/null +++ b/crates/quarto-core/tests/attribution_cli.rs @@ -0,0 +1,114 @@ +//! Phase 0 test #9b — `(cli, yaml) → resolved` mode resolution matrix. +//! +//! Pure unit test on the public resolver function so "silent override +//! on CLI/YAML conflict" can't regress. Plus a small `RenderContext` +//! integration assertion that resolved-`Off`/`None` never installs a +//! `GitBlameProvider`. +//! +//! The E2E CLI test (Phase 0 test #9) lives in +//! `crates/quarto/tests/attribution_cli_e2e.rs` because it drives the +//! `q2` binary. + +use quarto_core::Format; +use quarto_core::attribution::mode::{AttributionMode, resolve_attribution_mode}; +use quarto_core::project::{DocumentInfo, ProjectConfig, ProjectContext}; +use quarto_core::render::{BinaryDependencies, RenderContext}; + +// =========================================================================== +// Pure resolution function — all eight cases. +// =========================================================================== + +#[test] +fn resolve_attribution_mode_returns_none_when_both_absent() { + assert_eq!(resolve_attribution_mode(None, None), None); +} + +#[test] +fn resolve_attribution_mode_yaml_off_with_no_cli() { + assert_eq!( + resolve_attribution_mode(None, Some(AttributionMode::Off)), + Some(AttributionMode::Off) + ); +} + +#[test] +fn resolve_attribution_mode_yaml_git_with_no_cli() { + assert_eq!( + resolve_attribution_mode(None, Some(AttributionMode::Git)), + Some(AttributionMode::Git) + ); +} + +#[test] +fn resolve_attribution_mode_cli_off_with_no_yaml() { + assert_eq!( + resolve_attribution_mode(Some(AttributionMode::Off), None), + Some(AttributionMode::Off) + ); +} + +/// The escape-hatch case the prior review explicitly called out: +/// `--attribution=off` on the CLI must win over `attribution: git` +/// in project YAML. +#[test] +fn resolve_attribution_mode_cli_off_beats_yaml_git() { + assert_eq!( + resolve_attribution_mode(Some(AttributionMode::Off), Some(AttributionMode::Git)), + Some(AttributionMode::Off), + "CLI `--attribution=off` is the escape-hatch override" + ); +} + +#[test] +fn resolve_attribution_mode_cli_git_with_no_yaml() { + assert_eq!( + resolve_attribution_mode(Some(AttributionMode::Git), None), + Some(AttributionMode::Git) + ); +} + +#[test] +fn resolve_attribution_mode_cli_git_beats_yaml_off() { + assert_eq!( + resolve_attribution_mode(Some(AttributionMode::Git), Some(AttributionMode::Off)), + Some(AttributionMode::Git), + "symmetric case: CLI overrides YAML in both directions" + ); +} + +#[test] +fn resolve_attribution_mode_cli_git_yaml_git_trivial_agreement() { + assert_eq!( + resolve_attribution_mode(Some(AttributionMode::Git), Some(AttributionMode::Git)), + Some(AttributionMode::Git) + ); +} + +// =========================================================================== +// Integration: resolved `Off`/`None` must NOT install a GitBlameProvider. +// =========================================================================== + +#[test] +fn render_context_default_has_no_attribution_provider() { + let dir = std::env::temp_dir().join("attribution-cli-#9b"); + let project = ProjectContext { + dir: dir.clone(), + config: ProjectConfig::default(), + is_single_file: true, + files: vec![DocumentInfo::from_path(dir.join("test.qmd"))], + output_dir: dir.clone(), + }; + let doc = DocumentInfo::from_path(dir.join("test.qmd")); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let ctx = RenderContext::new(&project, &doc, &format, &binaries); + + assert!( + ctx.attribution_provider.is_none(), + "unflagged default: no provider installed" + ); + assert!( + ctx.attribution_data.is_none(), + "unflagged default: sidecar empty" + ); +} diff --git a/crates/quarto-core/tests/attribution_generate.rs b/crates/quarto-core/tests/attribution_generate.rs new file mode 100644 index 000000000..fba49d324 --- /dev/null +++ b/crates/quarto-core/tests/attribution_generate.rs @@ -0,0 +1,366 @@ +//! Phase 0 tests #4 and #5 — `AttributionGenerateTransform`. +//! +//! - **#4**: happy path. Given a fixture provider, the transform +//! populates `ctx.attribution_data`; the run-actor Arcs are +//! pointer-equal to the corresponding key in `identities`. +//! - **#5**: skip conditions (no provider, feature disabled, format +//! doesn't consume the lookup) plus the identities-only YAML +//! override merge (provider wins on collision for the Arc key, user +//! wins on identity value, non-colliding user keys are dropped). + +use std::sync::Arc; + +use quarto_core::Format; +use quarto_core::Result; +use quarto_core::attribution::{ + AttributionData, AttributionDataBuilder, AttributionHit, AttributionMap, AttributionSource, + AttributionSourceProvider, Identity, +}; +use quarto_core::project::{DocumentInfo, ProjectConfig, ProjectContext}; +use quarto_core::render::{BinaryDependencies, RenderContext}; +use quarto_core::transform::AstTransform; +use quarto_core::transforms::AttributionGenerateTransform; +use quarto_pandoc_types::ConfigMapEntry; +use quarto_pandoc_types::config_value::ConfigValue; +use quarto_pandoc_types::pandoc::Pandoc; +use quarto_source_map::SourceInfo; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn make_project(dir: &std::path::Path) -> ProjectContext { + ProjectContext { + dir: dir.to_path_buf(), + config: ProjectConfig::default(), + is_single_file: true, + files: vec![DocumentInfo::from_path(dir.join("test.qmd"))], + output_dir: dir.to_path_buf(), + } +} + +fn make_doc(dir: &std::path::Path) -> DocumentInfo { + DocumentInfo::from_path(dir.join("test.qmd")) +} + +fn empty_meta() -> ConfigValue { + ConfigValue::new_map(Vec::new(), SourceInfo::default()) +} + +fn map(entries: Vec<(&str, ConfigValue)>) -> ConfigValue { + let info = SourceInfo::default(); + let map_entries: Vec = entries + .into_iter() + .map(|(k, v)| ConfigMapEntry { + key: k.to_string(), + key_source: info.clone(), + value: v, + }) + .collect(); + ConfigValue::new_map(map_entries, info) +} + +fn s(x: &str) -> ConfigValue { + ConfigValue::new_string(x, SourceInfo::default()) +} + +fn b(x: bool) -> ConfigValue { + ConfigValue::new_bool(x, SourceInfo::default()) +} + +fn pandoc_with_meta(meta: ConfigValue) -> Pandoc { + Pandoc { + blocks: Vec::new(), + meta, + } +} + +/// Query helper that imports the trait inline so the test code reads cleanly. +fn query(map: &AttributionMap, start: usize, end: usize) -> Option { + map.query_byte_range(start, end) +} + +/// Fixture provider that hands back a fixed `AttributionData`. +struct FixtureProvider { + data: AttributionData, +} + +impl AttributionSourceProvider for FixtureProvider { + fn build(&self, _ctx: &RenderContext) -> Result { + Ok(self.data.clone()) + } +} + +// =========================================================================== +// Phase 0 test #4 — happy path +// =========================================================================== + +#[tokio::test] +async fn generate_happy_path_populates_sidecar_and_preserves_arc_interning() { + let dir = std::env::temp_dir().join("attribution-test-#4"); + let project = make_project(&dir); + let doc = make_doc(&dir); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = RenderContext::new(&project, &doc, &format, &binaries); + + let mut bld = AttributionDataBuilder::new(); + bld.set_identity( + "alice", + Identity { + display_name: "Alice".to_string(), + color: "#ff0000".to_string(), + }, + ); + bld.push_run(0, 5, "alice", 1); + bld.push_run(5, 10, "bob", 2); + let data = bld.build(); + + ctx.attribution_provider = Some(Arc::new(FixtureProvider { data })); + + let mut ast = pandoc_with_meta(empty_meta()); + AttributionGenerateTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + let sidecar = ctx + .attribution_data + .as_ref() + .expect("attribution_data populated"); + + let hit = query(&sidecar.runs, 0, 10).expect("hit on full range"); + assert_eq!(hit.actor.as_ref(), "bob"); + assert_eq!(hit.time, 2); + + let (alice_key, alice_identity) = sidecar + .identities + .iter() + .find(|(k, _)| k.as_ref() == "alice") + .expect("alice identity present"); + assert_eq!(alice_identity.display_name, "Alice"); + assert_eq!(alice_identity.color, "#ff0000"); + + let alice_run = sidecar + .runs + .as_slice() + .iter() + .find(|r| r.actor.as_ref() == "alice") + .expect("alice run"); + assert!( + Arc::ptr_eq(alice_key, &alice_run.actor), + "interning invariant: identities key Arc is ptr-equal to AttributionRun.actor" + ); +} + +#[tokio::test] +async fn generate_with_empty_provider_identities_leaves_sidecar_identities_empty() { + let dir = std::env::temp_dir().join("attribution-test-#4-empty-id"); + let project = make_project(&dir); + let doc = make_doc(&dir); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = RenderContext::new(&project, &doc, &format, &binaries); + + let mut bld = AttributionDataBuilder::new(); + bld.push_run(0, 5, "alice", 1); + ctx.attribution_provider = Some(Arc::new(FixtureProvider { data: bld.build() })); + + let mut ast = pandoc_with_meta(empty_meta()); + AttributionGenerateTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + let sidecar = ctx.attribution_data.as_ref().expect("sidecar populated"); + assert!( + sidecar.identities.is_empty(), + "provider returned no identities; merge produces an empty map" + ); +} + +// =========================================================================== +// Phase 0 test #5 — skip conditions and identities-only YAML override +// =========================================================================== + +#[tokio::test] +async fn generate_no_provider_skips_silently() { + let dir = std::env::temp_dir().join("attribution-test-#5-no-provider"); + let project = make_project(&dir); + let doc = make_doc(&dir); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = RenderContext::new(&project, &doc, &format, &binaries); + + let mut ast = pandoc_with_meta(empty_meta()); + AttributionGenerateTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + assert!( + ctx.attribution_data.is_none(), + "no provider → sidecar untouched" + ); + assert!( + ctx.diagnostics.is_empty(), + "no provider → no diagnostic emitted" + ); +} + +#[tokio::test] +async fn generate_feature_disabled_skips() { + let dir = std::env::temp_dir().join("attribution-test-#5-feature-disabled"); + let project = make_project(&dir); + let doc = make_doc(&dir); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = RenderContext::new(&project, &doc, &format, &binaries); + + let mut bld = AttributionDataBuilder::new(); + bld.push_run(0, 5, "alice", 1); + ctx.attribution_provider = Some(Arc::new(FixtureProvider { data: bld.build() })); + + let meta = map(vec![("attribution", b(false))]); + let mut ast = pandoc_with_meta(meta); + + AttributionGenerateTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + assert!( + ctx.attribution_data.is_none(), + "feature disabled → sidecar untouched" + ); +} + +#[tokio::test] +async fn generate_non_consuming_format_skips_before_calling_provider() { + let dir = std::env::temp_dir().join("attribution-test-#5-non-consuming"); + let project = make_project(&dir); + let doc = make_doc(&dir); + // Any non-HTML format works to exercise the skip ladder's first + // rule. `pdf` is a real format; `native` was a Phase 0 placeholder + // that doesn't exist in `FormatIdentifier`. + let format = Format::from_format_string("pdf").expect("pdf format"); + let binaries = BinaryDependencies::new(); + let mut ctx = RenderContext::new(&project, &doc, &format, &binaries); + + /// Provider that would panic if `build` were called. The skip + /// ladder must bail before reaching it. + struct PanicProvider; + impl AttributionSourceProvider for PanicProvider { + fn build(&self, _ctx: &RenderContext) -> Result { + panic!("provider must NOT be called for non-consuming formats"); + } + } + ctx.attribution_provider = Some(Arc::new(PanicProvider)); + + let mut ast = pandoc_with_meta(empty_meta()); + AttributionGenerateTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + assert!(ctx.attribution_data.is_none()); +} + +/// Identities-only YAML override merge (positive case, not a skip). +/// +/// Three sub-assertions: +/// - **(a)** Key present in both YAML and provider → user identity +/// wins; the merged map's key for that actor is `Arc::ptr_eq` to the +/// provider's `Arc` (preserving the interning invariant). +/// - **(b)** Key present only in the provider → unchanged. +/// - **(c)** Key present only in user YAML → dropped (not unioned). +#[tokio::test] +async fn generate_identities_only_yaml_override_merges_correctly() { + let dir = std::env::temp_dir().join("attribution-test-#5-yaml-merge"); + let project = make_project(&dir); + let doc = make_doc(&dir); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = RenderContext::new(&project, &doc, &format, &binaries); + + let mut bld = AttributionDataBuilder::new(); + bld.set_identity( + "alice", + Identity { + display_name: "Alice from provider".to_string(), + color: "#000001".to_string(), + }, + ); + bld.set_identity( + "bob", + Identity { + display_name: "Bob from provider".to_string(), + color: "#000002".to_string(), + }, + ); + bld.push_run(0, 5, "alice", 1); + bld.push_run(5, 10, "bob", 2); + ctx.attribution_provider = Some(Arc::new(FixtureProvider { data: bld.build() })); + + // meta.attribution.identities = { alice: , carol: } + let alice_id = map(vec![ + ("name", s("Alice from YAML")), + ("color", s("#ffaaaa")), + ]); + let carol_id = map(vec![("name", s("Carol")), ("color", s("#ccccff"))]); + let identities = map(vec![("alice", alice_id), ("carol", carol_id)]); + let attribution_node = map(vec![("identities", identities)]); + let meta = map(vec![("attribution", attribution_node)]); + let mut ast = pandoc_with_meta(meta); + + AttributionGenerateTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + let sidecar = ctx.attribution_data.as_ref().expect("sidecar populated"); + + // (a) alice: user value wins, but Arc key is provider's. + let (alice_key, alice_id_merged) = sidecar + .identities + .iter() + .find(|(k, _)| k.as_ref() == "alice") + .expect("alice identity present"); + assert_eq!( + alice_id_merged.display_name, "Alice from YAML", + "(a) user identity wins on collision" + ); + assert_eq!(alice_id_merged.color, "#ffaaaa"); + let alice_run = sidecar + .runs + .as_slice() + .iter() + .find(|r| r.actor.as_ref() == "alice") + .expect("alice run"); + assert!( + Arc::ptr_eq(alice_key, &alice_run.actor), + "(a) interning invariant preserved through merge" + ); + + // (b) bob: provider-only, unchanged. + let bob_id = sidecar + .identities + .iter() + .find(|(k, _)| k.as_ref() == "bob") + .map(|(_, id)| id) + .expect("bob identity present"); + assert_eq!( + bob_id.display_name, "Bob from provider", + "(b) provider unchanged" + ); + + // (c) carol: YAML-only, dropped. + let carol = sidecar + .identities + .iter() + .find(|(k, _)| k.as_ref() == "carol"); + assert!( + carol.is_none(), + "(c) non-colliding user-only YAML identity is dropped (no runs for that actor)" + ); +} diff --git a/crates/quarto-core/tests/attribution_gitblame.rs b/crates/quarto-core/tests/attribution_gitblame.rs new file mode 100644 index 000000000..bcae01924 --- /dev/null +++ b/crates/quarto-core/tests/attribution_gitblame.rs @@ -0,0 +1,296 @@ +//! Phase 0 tests #3 and #12 — `GitBlameProvider` porcelain parsing +//! plus producer invariant. +//! +//! Fixtures live as **checked-in porcelain text** under +//! `tests/fixtures/attribution-blame/` so these unit tests don't +//! depend on live commit timestamps or git being installed. The +//! `REGEN.md` file in that directory documents how to refresh them. + +use std::sync::Arc; + +use quarto_core::attribution::{ + AttributionSourceProvider, BlameLine, BlameRun, GitBlameProvider, actor_color, + attribution_from_porcelain, build_blame_runs, fnv1a_hex8, parse_blame_porcelain, +}; + +// =========================================================================== +// Phase 0 test #3 — Parses porcelain identically to TS reference +// =========================================================================== + +#[test] +fn parse_single_commit_single_line() { + let porcelain = include_str!("fixtures/attribution-blame/single-commit.porcelain"); + let parsed = parse_blame_porcelain(porcelain); + assert_eq!(parsed.len(), 1); + assert_eq!( + parsed[0], + BlameLine { + author: "Alice".to_string(), + author_mail: "alice@example.com".to_string(), + committer_time: 1_700_000_000, + } + ); +} + +/// Regression: when a commit is back-dated (`git commit --date=PAST` +/// or any rebase / cherry-pick / amend), the porcelain reports a +/// past `author-time` alongside a present `committer-time`. The +/// run's `time` field, which feeds `data-attr-time` and ultimately +/// the rendered relative-time badge, must follow committer-time so +/// the viewer surfaces "when this line was committed to the branch" +/// rather than "when its author originally wrote it back in 2023". +/// +/// The porcelain block carries both `author-time 1700000000` and +/// `committer-time 1900000000`; only the latter must survive into +/// `BlameRun.time`. +#[test] +fn parse_uses_committer_time_for_run_time_even_with_backdated_author() { + let porcelain = "\ +abcdef0123456789abcdef0123456789abcdef01 1 1 1 +author Alice +author-mail +author-time 1700000000 +author-tz +0000 +committer Alice +committer-mail +committer-time 1900000000 +committer-tz +0000 +summary backdated +boundary +filename doc.qmd +\thello +"; + let parsed = parse_blame_porcelain(porcelain); + assert_eq!(parsed.len(), 1); + assert_eq!(parsed[0].committer_time, 1_900_000_000); + + let runs = build_blame_runs(&parsed, "hello\n").expect("build runs"); + assert_eq!(runs.len(), 1); + assert_eq!( + runs[0].time, 1_900_000_000, + "run.time must follow committer-time; a regression to \ + author-time (1700000000) would make back-dated commits look \ + ancient in the rendered viewer" + ); +} + +#[test] +fn parse_caches_commit_metadata_across_lines_from_same_commit() { + // The fixture has commit `aaa...` emitting both line 1 and line 2; + // the second line record has only ` 2 2` and a `\t...` body, + // with no author block — the parser must hydrate from cache. + let porcelain = include_str!("fixtures/attribution-blame/multi-commit.porcelain"); + let parsed = parse_blame_porcelain(porcelain); + assert!(parsed.len() >= 2); + assert_eq!(parsed[0].author_mail, "alice@example.com"); + assert_eq!(parsed[1].author_mail, "alice@example.com"); + assert_eq!(parsed[0].committer_time, parsed[1].committer_time); +} + +#[test] +fn parse_empty_porcelain_returns_empty_vec() { + assert!(parse_blame_porcelain("").is_empty()); +} + +#[test] +fn build_runs_handles_multi_byte_utf8() { + // 世界\n is 3+3+1 = 7 bytes. + let blame = vec![BlameLine { + author: "Alice".into(), + author_mail: "alice@x".into(), + committer_time: 1, + }]; + let runs = build_blame_runs(&blame, "世界\n").expect("build runs"); + assert_eq!( + runs, + vec![BlameRun { + byte_start: 0, + byte_end: 7, + actor: "alice@x".into(), + time: 1, + }] + ); +} + +#[test] +fn build_runs_handles_text_without_trailing_newline() { + let blame = vec![ + BlameLine { + author: "A".into(), + author_mail: "a@x".into(), + committer_time: 1, + }, + BlameLine { + author: "B".into(), + author_mail: "b@x".into(), + committer_time: 2, + }, + ]; + let runs = build_blame_runs(&blame, "foo\nbar").expect("build runs"); + assert_eq!( + runs, + vec![ + BlameRun { + byte_start: 0, + byte_end: 4, + actor: "a@x".into(), + time: 1, + }, + BlameRun { + byte_start: 4, + byte_end: 7, + actor: "b@x".into(), + time: 2, + }, + ] + ); +} + +#[test] +fn build_runs_errors_on_line_count_mismatch() { + // Empty blame vs non-empty text — must error. + let blame: Vec = Vec::new(); + let result = build_blame_runs(&blame, "hello\n"); + assert!( + result.is_err(), + "line-count mismatch must error, not silently accept" + ); +} + +// =========================================================================== +// Phase 0 test #12 — GitBlameProvider producer invariant +// =========================================================================== +// +// Every actor referenced by `runs` has an entry in `identities`, +// each entry's `display_name` equals the mail-local-part, and `color` +// equals `actor_color(fnv1a_hex8(email))`. Pin the deterministic +// colour for a known email so a future refactor of `fnv1a_hex8` can't +// silently shift hues. + +#[test] +fn fnv1a_hex8_is_deterministic_and_well_distributed() { + // Sanity: two arbitrary strings hash differently. + let h_alice = fnv1a_hex8("alice@example.com"); + let h_bob = fnv1a_hex8("bob@example.com"); + assert_ne!(h_alice, h_bob); + assert_eq!(h_alice.len(), 8); + assert!( + h_alice.chars().all(|c| c.is_ascii_hexdigit()), + "fnv1a_hex8 output must be lowercase hex" + ); + // Stability: calling twice with the same input gives the same answer. + assert_eq!(h_alice, fnv1a_hex8("alice@example.com")); +} + +#[test] +fn actor_color_is_deterministic_and_returns_a_hex_palette_entry() { + let c = actor_color("aabbccdd"); + assert!( + c.starts_with('#') && c.len() == 7, + "actor_color must return a hex string from the Tol Muted palette; got: {c}" + ); + assert_eq!(c, actor_color("aabbccdd"), "deterministic"); +} + +#[test] +fn gitblame_provider_constructs_as_trait_object() { + // Pin: GitBlameProvider implements AttributionSourceProvider so + // the dyn-trait construction in RenderContext::attribution_provider + // works. + let provider = GitBlameProvider::new(); + let _typed: Arc = Arc::new(provider); +} + +#[test] +fn gitblame_single_author_fixture_satisfies_producer_invariant() { + // `single-commit.porcelain` blames a one-line file (`hello\n`) + // to alice@example.com. + let porcelain = include_str!("fixtures/attribution-blame/single-commit.porcelain"); + let data = attribution_from_porcelain(porcelain, "hello\n").expect("assemble"); + + let alice: Arc = Arc::from("alice@example.com"); + // Every actor referenced by runs has an identity entry. + for run in data.runs.as_slice() { + assert!( + data.identities.contains_key(&run.actor), + "producer invariant violated: actor {:?} missing from identities", + run.actor + ); + } + let id = data.identities.get(&alice).expect("alice identity present"); + assert_eq!(id.display_name, "alice"); + // Pin the deterministic colour for alice@example.com so a future + // refactor of fnv1a_hex8 or the Tol Muted palette can't silently + // shift the assignment. + assert_eq!(id.color, "#117733"); +} + +#[test] +fn gitblame_multi_author_fixture_satisfies_producer_invariant() { + // `multi-commit.porcelain` blames a four-line file: + // line1\n -> alice@example.com + // 世界\n -> alice@example.com + // line3\n -> bob@example.com + // line4\n -> bob@example.com + let porcelain = include_str!("fixtures/attribution-blame/multi-commit.porcelain"); + let source = "line1\n世界\nline3\nline4\n"; + let data = attribution_from_porcelain(porcelain, source).expect("assemble"); + + // Producer invariant: every distinct actor in runs has an + // identity entry. + let mut distinct_actors: Vec = data + .runs + .as_slice() + .iter() + .map(|r| r.actor.to_string()) + .collect(); + distinct_actors.sort(); + distinct_actors.dedup(); + assert_eq!( + distinct_actors, + vec![ + "alice@example.com".to_string(), + "bob@example.com".to_string() + ] + ); + for run in data.runs.as_slice() { + assert!( + data.identities.contains_key(&run.actor), + "producer invariant violated: actor {:?} missing from identities", + run.actor + ); + } + + // Each entry's display_name equals the mail-local-part and color + // equals actor_color(fnv1a_hex8(email)). + for (actor, identity) in data.identities.iter() { + let actor_str: &str = actor; + let expected_local = actor_str + .split_once('@') + .map(|(l, _)| l.to_string()) + .unwrap_or_else(|| actor_str.to_string()); + assert_eq!(identity.display_name, expected_local); + assert_eq!(identity.color, actor_color(&fnv1a_hex8(actor_str))); + } + + // Pin alice and bob colours so a future refactor of fnv1a_hex8, + // actor_color, or the Tol Muted palette ordering can't silently + // shift the per-actor assignment. + let alice: Arc = Arc::from("alice@example.com"); + let bob: Arc = Arc::from("bob@example.com"); + assert_eq!(data.identities.get(&alice).expect("alice").color, "#117733"); + assert_eq!(data.identities.get(&bob).expect("bob").color, "#CC6677"); + + // Arc-interning invariant: every run's actor Arc is + // pointer-equal to the corresponding identity-map key. + for run in data.runs.as_slice() { + let (k, _v) = data + .identities + .get_key_value(&run.actor) + .expect("identity present"); + assert!( + Arc::ptr_eq(&run.actor, k), + "actor Arc in run must be ptr-eq to identity-map key" + ); + } +} diff --git a/crates/quarto-core/tests/attribution_render.rs b/crates/quarto-core/tests/attribution_render.rs new file mode 100644 index 000000000..031c8e86e --- /dev/null +++ b/crates/quarto-core/tests/attribution_render.rs @@ -0,0 +1,426 @@ +//! Phase 0 tests #6, #7, #7b, #7c, #7d, #8b — `AttributionRenderTransform`. +//! +//! All exercise the render-transform contract from a synthetic AST. +//! The transform is `unimplemented!()` until Phase 4c, so each test +//! goes red on the transform call. Once Phase 4c lands, the assertion +//! blocks below pin the writer-side behaviour. + +use std::sync::Arc; + +use quarto_core::Format; +use quarto_core::attribution::{AttributionData, AttributionDataBuilder, Identity}; +use quarto_core::project::{DocumentInfo, ProjectConfig, ProjectContext}; +use quarto_core::render::{BinaryDependencies, RenderContext}; +use quarto_core::transform::AstTransform; +use quarto_core::transforms::AttributionRenderTransform; +use quarto_pandoc_types::config_value::ConfigValue; +use quarto_pandoc_types::pandoc::Pandoc; +use quarto_source_map::SourceInfo; + +fn make_ctx_for_test<'a>( + project: &'a ProjectContext, + doc: &'a DocumentInfo, + format: &'a Format, + binaries: &'a BinaryDependencies, +) -> RenderContext<'a> { + RenderContext::new(project, doc, format, binaries) +} + +fn make_project(dir: &std::path::Path) -> ProjectContext { + ProjectContext { + dir: dir.to_path_buf(), + config: ProjectConfig::default(), + is_single_file: true, + files: vec![DocumentInfo::from_path(dir.join("test.qmd"))], + output_dir: dir.to_path_buf(), + } +} + +fn empty_pandoc() -> Pandoc { + Pandoc { + blocks: Vec::new(), + meta: ConfigValue::new_map(Vec::new(), SourceInfo::default()), + } +} + +/// Construct an AttributionData with alice mapped and bob deliberately +/// **not** mapped — the warning-path invariant violation used by +/// tests #6 and #7. +fn fixture_with_unmapped_bob() -> AttributionData { + let mut b = AttributionDataBuilder::new(); + b.set_identity( + "alice", + Identity { + display_name: "Alice".to_string(), + color: "#ff0000".to_string(), + }, + ); + // Note: bob has runs but no identity → producer-invariant violation. + b.push_run(0, 5, "alice", 1); + b.push_run(5, 10, "bob", 2); + b.build() +} + +// =========================================================================== +// Phase 0 test #6 — q2-debug delivery (warning-path) +// =========================================================================== +// +// Given an AST with two `Str` nodes whose `SourceInfo`s point to +// ranges 0..5 and 5..10, and a `ctx.attribution_data` whose +// `identities` map has alice but **deliberately omits bob** (an +// invariant violation), the transform: +// 1. emits exactly one diagnostic warning naming `bob`, +// 2. populates `ctx.format_options.json.attribution_lookup` with +// two records, +// 3. populates `ctx.format_options.json.attribution_actors` with +// entries for both alice and bob (bob's via the `` / +// `#888888` placeholder). +// +// Phase 0 status: RED — transform is `unimplemented!()`. The fixture +// + assertion shape is checked in for the Phase 4c implementer to +// turn green. + +#[tokio::test] +async fn render_q2_debug_warning_path_emits_diagnostic_and_placeholder() { + let dir = std::env::temp_dir().join("attribution-test-#6"); + let project = make_project(&dir); + let doc = DocumentInfo::from_path(dir.join("test.qmd")); + let format = Format::html(); // q2-debug pseudo-format aliases to html for body writer + let binaries = BinaryDependencies::new(); + let mut ctx = make_ctx_for_test(&project, &doc, &format, &binaries); + + ctx.attribution_data = Some(Arc::new(fixture_with_unmapped_bob())); + + let mut ast = empty_pandoc(); + AttributionRenderTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + // Phase 4c: assert exactly one diagnostic about bob. + let warnings_about_bob: Vec<_> = ctx + .diagnostics + .iter() + .filter(|d| format!("{:?}", d).contains("bob")) + .collect(); + assert_eq!( + warnings_about_bob.len(), + 1, + "exactly one diagnostic warning naming bob; got {} warnings total: {:#?}", + ctx.diagnostics.len(), + ctx.diagnostics + ); + + // Phase 4c: assert the json format options were populated. + let lookup = ctx + .format_options + .json + .attribution_lookup + .as_ref() + .expect("attribution_lookup populated"); + assert!( + !lookup.is_empty(), + "lookup vec contains at least the source-info pool entries seen" + ); + + let actors = ctx + .format_options + .json + .attribution_actors + .as_ref() + .expect("attribution_actors populated"); + let alice = actors + .iter() + .find(|(k, _)| k.as_ref() == "alice") + .map(|(_, v)| v) + .expect("alice in actors table"); + assert_eq!(alice.display_name, "Alice"); + assert_eq!(alice.color, "#ff0000"); + + let bob = actors + .iter() + .find(|(k, _)| k.as_ref() == "bob") + .map(|(_, v)| v) + .expect("bob in actors table (placeholder)"); + assert_eq!(bob.display_name, ""); + assert_eq!(bob.color, "#888888"); +} + +/// Off-path regression: when no attribution_data is set, the writer +/// configuration must be unchanged from the unflagged baseline. This +/// makes the byte-identicality invariant mechanical: both +/// `attribution_lookup` and `attribution_actors` stay `None`. +#[tokio::test] +async fn render_q2_debug_off_path_leaves_format_options_default() { + let dir = std::env::temp_dir().join("attribution-test-#6-off"); + let project = make_project(&dir); + let doc = DocumentInfo::from_path(dir.join("test.qmd")); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = make_ctx_for_test(&project, &doc, &format, &binaries); + // ctx.attribution_data left as None. + + let mut ast = empty_pandoc(); + AttributionRenderTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + assert!( + ctx.format_options.json.attribution_lookup.is_none(), + "off-path: json.attribution_lookup must stay None" + ); + assert!( + ctx.format_options.json.attribution_actors.is_none(), + "off-path: json.attribution_actors must stay None" + ); + assert!( + ctx.format_options.html.attribution_lookup.is_none(), + "off-path: html.attribution_lookup must stay None" + ); + assert!( + ctx.format_options.html.attribution_identities.is_none(), + "off-path: html.attribution_identities must stay None" + ); + assert!(ctx.diagnostics.is_empty(), "no diagnostic on off-path"); +} + +// =========================================================================== +// Phase 0 test #7 — HTML delivery (warning-path) +// =========================================================================== +// +// Mirrors #6 but for the HTML writer side. Phase 4c populates +// `ctx.format_options.html.attribution_lookup` / `attribution_identities`; +// then Phase 4b uses these to emit `data-attr-*` attributes on each +// wrapped node. The Phase 0 contract is that the transform populates +// both fields (one diagnostic + bob placeholder); the HTML emission +// itself is exercised in test #7b/#7c/#7d. + +#[tokio::test] +async fn render_html_warning_path_populates_format_options_and_emits_one_diagnostic() { + let dir = std::env::temp_dir().join("attribution-test-#7"); + let project = make_project(&dir); + let doc = DocumentInfo::from_path(dir.join("test.qmd")); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = make_ctx_for_test(&project, &doc, &format, &binaries); + + ctx.attribution_data = Some(Arc::new(fixture_with_unmapped_bob())); + + let mut ast = empty_pandoc(); + AttributionRenderTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + // One diagnostic naming bob. + let warnings_about_bob: Vec<_> = ctx + .diagnostics + .iter() + .filter(|d| format!("{:?}", d).contains("bob")) + .collect(); + assert_eq!(warnings_about_bob.len(), 1); + + // html format_options populated. + let lookup = ctx + .format_options + .html + .attribution_lookup + .as_ref() + .expect("html.attribution_lookup populated"); + assert!(!lookup.is_empty()); + + let identities = ctx + .format_options + .html + .attribution_identities + .as_ref() + .expect("html.attribution_identities populated"); + let bob = identities + .iter() + .find(|(k, _)| k.as_ref() == "bob") + .map(|(_, v)| v) + .expect("bob in html identities (placeholder)"); + assert_eq!(bob.display_name, ""); + assert_eq!(bob.color, "#888888"); +} + +// =========================================================================== +// Phase 0 test #7b — HTML prose coalescing +// =========================================================================== +// +// Pinned semantics: three contiguous prose inlines with the same +// `(actor, time)` lookup coalesce into one outer `data-attr-*` +// wrapper. Per-inline `data-sid`/`data-loc` spans become inner +// children. A structured inline (Code, Emph, …) breaks the prose +// group. +// +// For Phase 0, all that needs to be checked in is the test scaffold +// — the underlying HTML coalescing pass is Phase 4b. Pinned by +// `unimplemented!()` panic until then. +// +// **Implementation note for the Phase 4 author**: this is a writer- +// level test; once the writer's coalescing pass and HtmlConfig +// fields land, port the assertions to render an HTML body and grep +// for the expected nesting (`word1word2`). + +#[tokio::test] +async fn render_html_coalescing_groups_contiguous_same_attribution_prose() { + // Phase 4b implements the actual coalescing pass; this scaffold + // currently red-panics on the transform's unimplemented body. + let dir = std::env::temp_dir().join("attribution-test-#7b"); + let project = make_project(&dir); + let doc = DocumentInfo::from_path(dir.join("test.qmd")); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = make_ctx_for_test(&project, &doc, &format, &binaries); + + ctx.attribution_data = Some(Arc::new(fixture_with_unmapped_bob())); + + let mut ast = empty_pandoc(); + AttributionRenderTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + // Writer-level coalescing semantics (one outer wrapper covering + // contiguous same-attribution Str inlines; per-Str `data-sid` + // spans nest inside when `include_source_locations` is on) are + // pinned by the pampa-level tests at + // `crates/pampa/tests/attribution_html_coalescing_test.rs` + // (Phase 4b). At this transform level we only assert that the + // writer-side lookup field reaches the HTML writer config — + // the coalescing pass consumes it from there. + let _ = ctx.format_options.html.attribution_lookup; +} + +// =========================================================================== +// Phase 0 test #7c — attribution-on + source-locations-off composition +// =========================================================================== +// +// Regression guard against re-coupling the two features. With +// `meta.include-source-locations: false` (or absent — same default), +// the HTML output must satisfy: +// - No `data-sid` or `data-loc` attributes anywhere. +// - All four `data-attr-*` attributes present on each wrapper. +// - Inner Str text has no per-inline span wrapper. + +#[tokio::test] +async fn render_html_attribution_on_source_locations_off_compose_orthogonally() { + let dir = std::env::temp_dir().join("attribution-test-#7c"); + let project = make_project(&dir); + let doc = DocumentInfo::from_path(dir.join("test.qmd")); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = make_ctx_for_test(&project, &doc, &format, &binaries); + + ctx.attribution_data = Some(Arc::new(fixture_with_unmapped_bob())); + + let mut ast = empty_pandoc(); + AttributionRenderTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + // Composition pinned at the writer level by Phase 4b's + // `attribution_on_source_locations_off_produces_outer_wrapper_no_inner_span` + // in `crates/pampa/tests/attribution_html_coalescing_test.rs`. + let _ = ctx.format_options.html.attribution_lookup; +} + +// =========================================================================== +// Phase 0 test #7d — Structured inlines break prose coalescing +// =========================================================================== +// +// Given `[Str("hello"), Code("world"), Str("foo")]` where all three +// lookups return the same `(actor=alice, time=1)`, the rendered HTML +// must contain **three** attribution wrappers: +// - outer prose wrapper around `Str("hello")`, +// - own wrapper around the rendered `world`, +// - outer prose wrapper around `Str("foo")`. +// +// The pattern is exercised for Code, Emph, Link, Span, Math in turn. + +#[tokio::test] +async fn render_html_structured_inlines_do_not_join_prose_coalescing() { + let dir = std::env::temp_dir().join("attribution-test-#7d"); + let project = make_project(&dir); + let doc = DocumentInfo::from_path(dir.join("test.qmd")); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = make_ctx_for_test(&project, &doc, &format, &binaries); + + let mut b = AttributionDataBuilder::new(); + b.set_identity( + "alice", + Identity { + display_name: "Alice".into(), + color: "#ff0000".into(), + }, + ); + b.push_run(0, 100, "alice", 1); + ctx.attribution_data = Some(Arc::new(b.build())); + + let mut ast = empty_pandoc(); + AttributionRenderTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + // The "structured inlines break the prose run" regression guard + // is pinned at the writer level by Phase 4b's + // `structured_inline_breaks_prose_coalescing` in + // `crates/pampa/tests/attribution_html_coalescing_test.rs`. + let _ = ctx.format_options.html.attribution_lookup; +} + +// =========================================================================== +// Phase 0 test #8b — Render skips non-primary-file nodes +// =========================================================================== +// +// Given an AST with one node whose `SourceInfo` resolves to file 0, +// bytes 0..5 (a hit on the primary doc's attribution map) and a +// second node whose `SourceInfo` resolves to file 1, bytes 0..5 +// (e.g. spliced in via `{{< include other.qmd >}}` whose byte range +// happens to overlap a run in the primary doc), the lookup vec has +// a record for the first node and **None** for the second. +// +// Pins the v1 "primary doc only" invariant against the silent +// byte-range-collision failure mode (Open Question #2). The fixture +// deliberately uses an overlapping byte range so that *only* the +// `file_id` filter (not range absence) explains the second node's +// `None`. + +#[tokio::test] +async fn render_skips_file_id_nonzero_nodes_even_when_byte_range_overlaps() { + let dir = std::env::temp_dir().join("attribution-test-#8b"); + let project = make_project(&dir); + let doc = DocumentInfo::from_path(dir.join("test.qmd")); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let mut ctx = make_ctx_for_test(&project, &doc, &format, &binaries); + + let mut b = AttributionDataBuilder::new(); + b.set_identity( + "alice", + Identity { + display_name: "Alice".into(), + color: "#ff0000".into(), + }, + ); + b.push_run(0, 1024, "alice", 1); + ctx.attribution_data = Some(Arc::new(b.build())); + + let mut ast = empty_pandoc(); + AttributionRenderTransform::new() + .transform(&mut ast, &mut ctx) + .await + .expect("transform"); + + // TODO(Phase 4c): construct an AST with two Str nodes whose + // SourceInfos chain-resolve to (file_id=0, 0..5) and (file_id=1, + // 0..5) respectively, and assert that the lookup vec has Some + // for the first node's pool index and None for the second's. + let _ = ctx.format_options.html.attribution_lookup; +} diff --git a/crates/quarto-core/tests/attribution_types.rs b/crates/quarto-core/tests/attribution_types.rs new file mode 100644 index 000000000..511b0a995 --- /dev/null +++ b/crates/quarto-core/tests/attribution_types.rs @@ -0,0 +1,254 @@ +//! Phase 0 tests #1 and #2. +//! +//! - **#1**: WASM-transport JSON round-trip with interning preservation. +//! The transport-only mirror types serde-round-trip in three +//! configurations (runs-only, identities-only, both populated), and +//! the canonical form produced by `PreBuiltAttributionProvider` +//! restores the `Arc` interning invariant. +//! - **#2**: `AttributionMap::query_byte_range` — mirrors the TS +//! `attribution-runs.test.ts` invariants on `feat/node-attribution`. + +use std::collections::HashMap; +use std::sync::Arc; + +use quarto_core::Format; +use quarto_core::attribution::{ + AttributionData, AttributionDataBuilder, AttributionMap, AttributionSource, + AttributionSourceProvider, Identity, PreBuiltAttributionProvider, TransportAttributionData, + TransportAttributionRun, +}; +use quarto_core::project::{DocumentInfo, ProjectConfig, ProjectContext}; +use quarto_core::render::{BinaryDependencies, RenderContext}; + +// =========================================================================== +// Phase 0 test #1 — Transport JSON round-trip + interning restoration +// =========================================================================== + +/// Construct a small but representative transport payload and serde +/// round-trip it, verifying the wire shape preserves all three field +/// configurations (runs-only, identities-only, both). +#[test] +fn transport_json_round_trip_runs_only() { + let original = TransportAttributionData { + runs: vec![TransportAttributionRun { + start: 0, + end: 5, + actor: "alice@example.com".to_string(), + time: 1_700_000_000_000, + }], + identities: HashMap::new(), + }; + let json = serde_json::to_string(&original).expect("serialize"); + // identities is empty → key should be omitted via skip_serializing_if + assert!( + !json.contains("\"identities\""), + "runs-only payload should omit empty identities; got: {json}" + ); + let decoded: TransportAttributionData = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(decoded.runs.len(), 1); + assert_eq!(decoded.runs[0].actor, "alice@example.com"); + assert_eq!(decoded.runs[0].start, 0); + assert_eq!(decoded.runs[0].end, 5); + assert_eq!(decoded.runs[0].time, 1_700_000_000_000); + assert!(decoded.identities.is_empty()); +} + +#[test] +fn transport_json_round_trip_identities_only() { + let mut identities = HashMap::new(); + identities.insert( + "alice@example.com".to_string(), + Identity { + display_name: "Alice".to_string(), + color: "#ff0000".to_string(), + }, + ); + let original = TransportAttributionData { + runs: Vec::new(), + identities, + }; + let json = serde_json::to_string(&original).expect("serialize"); + assert!( + !json.contains("\"runs\""), + "identities-only payload should omit empty runs; got: {json}" + ); + let decoded: TransportAttributionData = serde_json::from_str(&json).expect("deserialize"); + assert!(decoded.runs.is_empty()); + assert_eq!(decoded.identities.len(), 1); + let id = decoded.identities.get("alice@example.com").expect("alice"); + assert_eq!(id.display_name, "Alice"); + assert_eq!(id.color, "#ff0000"); + // Wire shape uses `name`, not `display_name`. + assert!( + json.contains("\"name\":\"Alice\""), + "Identity should serialize as `name`, not `display_name`; got: {json}" + ); +} + +#[test] +fn transport_json_round_trip_both_populated() { + let mut identities = HashMap::new(); + identities.insert( + "alice@example.com".to_string(), + Identity { + display_name: "Alice".to_string(), + color: "#ff0000".to_string(), + }, + ); + let original = TransportAttributionData { + runs: vec![TransportAttributionRun { + start: 0, + end: 5, + actor: "alice@example.com".to_string(), + time: 1_700_000_000_000, + }], + identities, + }; + let json = serde_json::to_string(&original).expect("serialize"); + let decoded: TransportAttributionData = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(decoded.runs.len(), 1); + assert_eq!(decoded.identities.len(), 1); +} + +/// Stronger assertion (the load-bearing half of test #1): +/// `PreBuiltAttributionProvider` takes a transport JSON string, +/// decodes it via the transport types, feeds the result through +/// `AttributionDataBuilder`, and the resulting canonical +/// `AttributionData` satisfies `Arc::ptr_eq(run.actor, +/// identities.get_key_value(actor))` for every actor that appears in +/// both runs and identities. +/// +/// This is the round-trip *interning restoration* contract. Each +/// `Arc::from(s)` during deserialize would otherwise allocate +/// per-occurrence; the builder re-interns so the writer-side +/// invariant is preserved through the wire. +#[test] +fn transport_round_trip_restores_arc_interning_via_prebuilt_provider() { + let mut identities = HashMap::new(); + identities.insert( + "alice@example.com".to_string(), + Identity { + display_name: "Alice".to_string(), + color: "#ff0000".to_string(), + }, + ); + let original = TransportAttributionData { + runs: vec![ + TransportAttributionRun { + start: 0, + end: 5, + actor: "alice@example.com".to_string(), + time: 1, + }, + // Second run by the same actor — interning should mean both + // `run.actor` Arcs are pointer-equal AND pointer-equal to + // the identities map key. + TransportAttributionRun { + start: 5, + end: 10, + actor: "alice@example.com".to_string(), + time: 2, + }, + ], + identities, + }; + let json = serde_json::to_string(&original).expect("serialize"); + let provider = PreBuiltAttributionProvider::new(json); + + // We need a RenderContext to call `build`. Construct a minimal + // single-doc context — the prebuilt provider doesn't actually + // consult any of the context fields; the ctx arg is just trait + // conformance. + let project_dir = std::env::temp_dir().join("attribution-test-#1"); + let project = ProjectContext { + dir: project_dir.clone(), + config: ProjectConfig::default(), + is_single_file: true, + files: vec![DocumentInfo::from_path(project_dir.join("input.qmd"))], + output_dir: project_dir.clone(), + }; + let doc = DocumentInfo::from_path(project_dir.join("input.qmd")); + let format = Format::html(); + let binaries = BinaryDependencies::new(); + let ctx = RenderContext::new(&project, &doc, &format, &binaries); + + let data: AttributionData = provider.build(&ctx).expect("build"); + + // Run-actor Arcs must be pointer-equal across runs by the same author. + assert_eq!(data.runs.len(), 2, "two runs survive the round-trip"); + let r0 = &data.runs.as_slice()[0]; + let r1 = &data.runs.as_slice()[1]; + assert!( + Arc::ptr_eq(&r0.actor, &r1.actor), + "interning invariant: same-author runs share the same Arc" + ); + + // Run-actor Arc must be pointer-equal to the identities map key + // for that actor — this is the interning invariant the writer- + // side `attribution_lookup` relies on. + let (key, _id) = data + .identities + .get_key_value(r0.actor.as_ref()) + .expect("identity entry for alice"); + assert!( + Arc::ptr_eq(key, &r0.actor), + "interning invariant: identities key Arc is ptr-equal to run.actor" + ); +} + +// =========================================================================== +// Phase 0 test #2 — AttributionMap::query_byte_range invariants +// =========================================================================== + +fn make_map(runs: Vec<(usize, usize, &str, i64)>) -> AttributionMap { + let mut b = AttributionDataBuilder::new(); + for (start, end, actor, time) in runs { + b.push_run(start, end, actor, time); + } + b.build().runs +} + +#[test] +fn query_byte_range_empty_runs_returns_none() { + let map = AttributionMap::new(); + assert!(map.query_byte_range(0, 10).is_none()); +} + +#[test] +fn query_byte_range_single_run_hit_within_bounds() { + let map = make_map(vec![(0, 10, "alice@x", 100)]); + let hit = map.query_byte_range(2, 5).expect("hit"); + assert_eq!(hit.actor.as_ref(), "alice@x"); + assert_eq!(hit.time, 100); +} + +#[test] +fn query_byte_range_non_overlapping_query_returns_none() { + let map = make_map(vec![(0, 5, "alice@x", 100)]); + assert!(map.query_byte_range(10, 20).is_none()); +} + +#[test] +fn query_byte_range_overlapping_two_actors_picks_most_recent() { + let map = make_map(vec![(0, 5, "alice@x", 100), (5, 10, "bob@x", 200)]); + let hit = map.query_byte_range(0, 10).expect("hit"); + assert_eq!(hit.actor.as_ref(), "bob@x"); + assert_eq!(hit.time, 200); +} + +#[test] +fn query_byte_range_at_run_boundary() { + // Query [0, 5) sits exactly inside the first run; the second run + // starts at 5 (exclusive boundary). The first author wins. + let map = make_map(vec![(0, 5, "alice@x", 100), (5, 10, "bob@x", 200)]); + let hit = map.query_byte_range(0, 5).expect("hit"); + assert_eq!(hit.actor.as_ref(), "alice@x"); + assert_eq!(hit.time, 100); +} + +#[test] +fn query_byte_range_inverted_or_empty_query_returns_none() { + let map = make_map(vec![(0, 10, "alice@x", 100)]); + assert!(map.query_byte_range(5, 5).is_none(), "empty range"); + assert!(map.query_byte_range(7, 3).is_none(), "inverted range"); +} diff --git a/crates/quarto-core/tests/attribution_viewer.rs b/crates/quarto-core/tests/attribution_viewer.rs new file mode 100644 index 000000000..974130296 --- /dev/null +++ b/crates/quarto-core/tests/attribution_viewer.rs @@ -0,0 +1,327 @@ +//! Phase A tests for `AttributionViewerTransform`. +//! +//! Pins the contract from +//! `claude-notes/plans/2026-05-14-attribution-auto-viewer.md`: when +//! attribution wrappers were produced (i.e. `AttributionRenderTransform` +//! populated `format_options.html.attribution_by_node`) and the YAML +//! opt-out was not set, the viewer transform appends an inline ` +``` + +`--attr-color` is consumed by the default viewer's base paint rule +(see [Default viewer](#viewer-overlay)) so authored prose appears in +the author's colour without any further wiring. `--attr-name` +is read by the hover badge from each wrapper's computed style. +Consuming themes can override either via a higher-specificity rule +in their own stylesheet without touching the rendered HTML. + +Contiguous prose authored by the same actor at the same time +coalesces into a single outer wrapper, so a paragraph written by one +person produces one wrapper rather than one per word. Structured +inlines (links, code, math, etc.) always get their own wrapper. + +## Default viewer {#viewer-overlay} + +By default, `--attribution=git` (or YAML `attribution: git`) ships a +small inline ` : null; + const overlay = hovered ? ( + + ) : null; + + return { enabled, hostProps, stylesheet, overlay }; +} diff --git a/hub-client/src/components/render/framework/index.ts b/hub-client/src/components/render/framework/index.ts index 5b2df4746..096d1b9bb 100644 --- a/hub-client/src/components/render/framework/index.ts +++ b/hub-client/src/components/render/framework/index.ts @@ -1,5 +1,17 @@ export * from './types'; export { RegistryContext } from './RegistryContext'; +export { + AttributionLookupContext, + useNodeAttribution, + type NodeAttributionIdentity, +} from './AttributionLookupContext'; +export { + AttributionBadge, + AttributionWrap, + attributionStyles, + formatRelativeTime, + useAttributionHover, +} from './attribution'; export { Ast } from './Ast'; export { Node, renderChildren, renderNode, blockTypes } from './dispatch'; export * from './plainText'; diff --git a/hub-client/src/components/render/iframeMessageDispatch.test.ts b/hub-client/src/components/render/iframeMessageDispatch.test.ts new file mode 100644 index 000000000..b80f52833 --- /dev/null +++ b/hub-client/src/components/render/iframeMessageDispatch.test.ts @@ -0,0 +1,263 @@ +/** + * Regression tests for the q2-debug / q2-preview iframe message + * dispatcher. + * + * The bug this guards against: when `LOAD_CUSTOM_COMPONENTS` is in + * flight and two `UPDATE_AST` messages arrive while components are + * still loading, the previous setInterval(check, 50) polling pattern + * could resolve the two waiters out of arrival order — each waiter + * had its own setInterval phase, so depending on when + * `componentsLoading` flipped, the second-arrived waiter could fire + * first and the first-arrived waiter would overwrite it. In the + * attribution-pipeline branch this manifested as the no-attribution + * AST overwriting the with-attribution AST, so attribution colouring + * never appeared by default for large files with `render-components: + * - html.tsx`. + * + * The fix replaces the polling with a single shared promise that all + * UPDATE_AST waiters `await`. Microtask continuations on one promise + * resolve in FIFO insertion order, so message arrival order is + * preserved deterministically. + */ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { + makeIframeMessageDispatcher, + type IframeMessage, +} from './iframeMessageDispatch'; + +interface DeferredPromise { + promise: Promise; + resolve: (value: T) => void; +} + +function deferred(): DeferredPromise { + let resolve!: (value: T) => void; + const promise = new Promise((r) => { + resolve = r; + }); + return { promise, resolve }; +} + +describe('makeIframeMessageDispatcher', () => { + it('runs UPDATE_AST immediately when no LOAD_CUSTOM_COMPONENTS has fired', async () => { + const updateAst = vi.fn(); + const dispatch = makeIframeMessageDispatcher({ + loadCustomComponents: vi.fn(), + updateAst, + }); + + await dispatch({ type: 'UPDATE_AST', payload: { astJson: 'A' } }); + expect(updateAst).toHaveBeenCalledOnce(); + expect(updateAst).toHaveBeenCalledWith({ astJson: 'A' }); + }); + + it('runs UPDATE_AST immediately after LOAD_CUSTOM_COMPONENTS has settled', async () => { + const updateAst = vi.fn(); + const dispatch = makeIframeMessageDispatcher({ + loadCustomComponents: () => Promise.resolve(), + updateAst, + }); + + await dispatch({ + type: 'LOAD_CUSTOM_COMPONENTS', + componentsCode: { 'html.tsx': '' }, + }); + await dispatch({ type: 'UPDATE_AST', payload: { astJson: 'A' } }); + await dispatch({ type: 'UPDATE_AST', payload: { astJson: 'B' } }); + + expect(updateAst.mock.calls.map((c) => c[0])).toEqual([ + { astJson: 'A' }, + { astJson: 'B' }, + ]); + }); + + it('defers UPDATE_AST while LOAD_CUSTOM_COMPONENTS is in flight', async () => { + const updateAst = vi.fn(); + const load = deferred(); + const dispatch = makeIframeMessageDispatcher({ + loadCustomComponents: () => load.promise, + updateAst, + }); + + // Fire-and-forget the load (handler awaits internally). + const loadHandler = dispatch({ + type: 'LOAD_CUSTOM_COMPONENTS', + componentsCode: { 'html.tsx': '' }, + }); + // Let the LOAD handler enter its `await`. + await Promise.resolve(); + + const u1 = dispatch({ type: 'UPDATE_AST', payload: { astJson: 'A' } }); + const u2 = dispatch({ type: 'UPDATE_AST', payload: { astJson: 'B' } }); + // Let both UPDATE_AST handlers enter their `await`s. + await Promise.resolve(); + + // Neither should have called updateAst yet. + expect(updateAst).not.toHaveBeenCalled(); + + // Release the load. Both waiters wake; their `updateAst` calls + // should run in arrival order (A then B), not in the order the + // 50ms setInterval phases happened to fire. + load.resolve(); + await Promise.all([loadHandler, u1, u2]); + + expect(updateAst.mock.calls.map((c) => c[0])).toEqual([ + { astJson: 'A' }, + { astJson: 'B' }, + ]); + }); + + it('preserves FIFO across three pending UPDATE_AST messages', async () => { + // Three waiters exercise the chain harder than two — the + // continuation-list bug would have surfaced as any permutation, + // not strictly a swap. + const updateAst = vi.fn(); + const load = deferred(); + const dispatch = makeIframeMessageDispatcher({ + loadCustomComponents: () => load.promise, + updateAst, + }); + + const loadHandler = dispatch({ + type: 'LOAD_CUSTOM_COMPONENTS', + componentsCode: { 'html.tsx': '' }, + }); + await Promise.resolve(); + + const handlers = [ + dispatch({ type: 'UPDATE_AST', payload: { astJson: 'A' } }), + dispatch({ type: 'UPDATE_AST', payload: { astJson: 'B' } }), + dispatch({ type: 'UPDATE_AST', payload: { astJson: 'C' } }), + ]; + await Promise.resolve(); + + load.resolve(); + await Promise.all([loadHandler, ...handlers]); + + expect(updateAst.mock.calls.map((c) => c[0])).toEqual([ + { astJson: 'A' }, + { astJson: 'B' }, + { astJson: 'C' }, + ]); + }); + + it('routes UPDATE_THEME to the applyTheme hook when provided', async () => { + const applyTheme = vi.fn(); + const dispatch = makeIframeMessageDispatcher({ + loadCustomComponents: vi.fn(), + updateAst: vi.fn(), + applyTheme, + }); + + await dispatch({ type: 'UPDATE_THEME', cssUrl: 'blob:abc' }); + await dispatch({ type: 'UPDATE_THEME', cssUrl: null }); + + expect(applyTheme.mock.calls.map((c) => c[0])).toEqual(['blob:abc', null]); + }); + + it('ignores UPDATE_THEME when applyTheme is not provided (q2-debug case)', async () => { + const updateAst = vi.fn(); + const dispatch = makeIframeMessageDispatcher({ + loadCustomComponents: vi.fn(), + updateAst, + }); + + // Should be a no-op, not a throw. + await expect( + dispatch({ type: 'UPDATE_THEME', cssUrl: 'blob:abc' } as IframeMessage), + ).resolves.toBeUndefined(); + expect(updateAst).not.toHaveBeenCalled(); + }); +}); + +/** + * Sanity-check that the *old* setInterval(check, 50) polling pattern + * can resolve UPDATE_AST waiters out of arrival order under + * deterministic fake-timer scheduling — i.e. that the bug actually + * existed and we're not chasing a non-issue. This test does NOT + * exercise the production code; it inlines the old algorithm in + * miniature. + * + * The chosen schedule (load at t=0, A queued at t=0, B queued at + * t=60, componentsLoading flipped at t=105) reproduces the canonical + * failure: A's setInterval fires at 50, 100 (both see + * componentsLoading=true), then 150; B's setInterval fires at 110 + * (sees false, resolves) before A's 150 fire. The resulting order + * is B-then-A, not A-then-B. + * + * Anchoring the regression with a deterministic reproduction of the + * old behaviour means a future reader doesn't have to take the + * commit message's word that the race was real. + */ +describe('legacy setInterval-polling pattern (documents the bug)', () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + afterEach(() => { + vi.useRealTimers(); + }); + + it('resolves UPDATE_AST waiters out of arrival order under unlucky timing', async () => { + const updateAstCalls: string[] = []; + let componentsLoading = false; + + const oldDispatch = async ( + msg: + | { type: 'LOAD_CUSTOM_COMPONENTS'; load: Promise } + | { type: 'UPDATE_AST'; payload: string }, + ) => { + if (msg.type === 'LOAD_CUSTOM_COMPONENTS') { + componentsLoading = true; + await msg.load; + componentsLoading = false; + } else { + if (componentsLoading) { + await new Promise((resolve) => { + const check = setInterval(() => { + if (!componentsLoading) { + clearInterval(check); + resolve(); + } + }, 50); + }); + } + updateAstCalls.push(msg.payload); + } + }; + + const load = deferred(); + // t=0 + const loadHandler = oldDispatch({ + type: 'LOAD_CUSTOM_COMPONENTS', + load: load.promise, + }); + await Promise.resolve(); // let LOAD handler enter await + + // A's setInterval is registered at t=0. + const uA = oldDispatch({ type: 'UPDATE_AST', payload: 'A' }); + await Promise.resolve(); + + // Advance fake time to t=60, then register B's setInterval. + await vi.advanceTimersByTimeAsync(60); + const uB = oldDispatch({ type: 'UPDATE_AST', payload: 'B' }); + await Promise.resolve(); + + // Advance to t=105 and flip componentsLoading to false. + // (Phase: A's fires at 50, 100, 150; B's at 110, 160.) + await vi.advanceTimersByTimeAsync(45); + load.resolve(); + await Promise.resolve(); + await Promise.resolve(); + + // Advance to t=160 so both intervals have had a chance to fire. + await vi.advanceTimersByTimeAsync(55); + + await Promise.all([loadHandler, uA, uB]); + + // The bug: B resolves at 110, A at 150 → updateAst is called in + // the wrong order. If this assertion ever flips (e.g. a future + // browser/JS engine changes the setInterval scheduling + // semantics), the comment block above also needs updating. + expect(updateAstCalls).toEqual(['B', 'A']); + }); +}); diff --git a/hub-client/src/components/render/iframeMessageDispatch.ts b/hub-client/src/components/render/iframeMessageDispatch.ts new file mode 100644 index 000000000..49e0a5eae --- /dev/null +++ b/hub-client/src/components/render/iframeMessageDispatch.ts @@ -0,0 +1,125 @@ +/** + * Shared message dispatcher for the q2-debug and q2-preview iframe + * entries. + * + * The dispatcher coordinates three message kinds the parent + * (`Q2DebugIframe` / `Q2PreviewIframe`) posts: + * + * - `LOAD_CUSTOM_COMPONENTS` — async, transpiled user-TSX modules + * are imported and merged into the iframe-side custom registry. + * - `UPDATE_AST` — incoming AST JSON to render. Must not run before + * the in-flight load (if any) has finished, otherwise the user's + * TSX overrides would not yet be available to the dispatcher + * chain and the AST would render against the pre-override + * registry. + * - `UPDATE_THEME` — q2-preview only; routed through `applyTheme` + * when supplied. + * + * History — the previous in-line implementation gated `UPDATE_AST` + * on a `componentsLoading: boolean` flag using a 50-ms polling + * `setInterval`. Each waiter spawned its own interval, so two + * `UPDATE_AST` messages queued while the load was in flight could + * resolve in the wrong order — one waiter's interval phase happened + * to align such that the *second-arrived* message fired first and + * the *first-arrived* message overwrote it. In the attribution + * pipeline this manifested as the no-attribution AST clobbering the + * with-attribution AST, so the Authorship colouring never appeared + * on first render for large files with `render-components: - + * html.tsx`. See `iframeMessageDispatch.test.ts` for the + * deterministic reproduction. + * + * This module replaces the polling with a single shared promise + * (`pendingLoad`). Every `UPDATE_AST` handler `await`s the same + * promise, so when it resolves, the waiter continuations are + * scheduled as microtasks in FIFO insertion order — which is the + * order the messages arrived. Message ordering is then preserved + * deterministically without timer-phase sensitivity. + */ + +export interface LoadCustomComponentsMessage { + type: 'LOAD_CUSTOM_COMPONENTS'; + componentsCode: Record; +} + +export interface UpdateAstMessage { + type: 'UPDATE_AST'; + payload: unknown; +} + +export interface UpdateThemeMessage { + type: 'UPDATE_THEME'; + cssUrl: string | null; +} + +export type IframeMessage = + | LoadCustomComponentsMessage + | UpdateAstMessage + | UpdateThemeMessage; + +export interface IframeMessageHandlers { + /** + * Imports user-TSX modules and merges them into the iframe's + * custom registry. Must be idempotent — the parent re-sends + * `LOAD_CUSTOM_COMPONENTS` whenever its `customComponentsCode` + * reference changes. + */ + loadCustomComponents: ( + componentsCode: Record, + ) => Promise; + /** Applies a new AST payload to the iframe's React root. */ + updateAst: (payload: unknown) => void; + /** + * q2-preview only — imperatively applies (or clears) the theme + * stylesheet `` in `document.head`. Omitted by q2-debug, + * which has no theme channel. + */ + applyTheme?: (cssUrl: string | null) => void; +} + +export type IframeMessageDispatcher = ( + message: IframeMessage, +) => Promise; + +/** + * Construct an iframe message dispatcher closed over the supplied + * handlers. The returned function is a single message-listener + * callback suitable for `window.addEventListener('message', …)` + * (after pulling `event.data` out of the MessageEvent). + */ +export function makeIframeMessageDispatcher( + handlers: IframeMessageHandlers, +): IframeMessageDispatcher { + // Holds the promise for the currently in-flight + // loadCustomComponents call (or `null` when no load is pending). + // Every UPDATE_AST handler `await`s this reference; FIFO microtask + // ordering on a single shared promise is what guarantees message + // arrival order is preserved. + let pendingLoad: Promise | null = null; + + return async function dispatch(message) { + if (message.type === 'LOAD_CUSTOM_COMPONENTS') { + const load = handlers.loadCustomComponents(message.componentsCode); + pendingLoad = load; + try { + await load; + } finally { + // Only clear when no newer load has replaced ours. + // Without this guard, a second LOAD that started while + // the first was still in flight would lose its + // pendingLoad pointer when the first settled, and any + // UPDATE_AST queued for the second load would run + // before it finished. + if (pendingLoad === load) { + pendingLoad = null; + } + } + } else if (message.type === 'UPDATE_AST') { + if (pendingLoad) { + await pendingLoad; + } + handlers.updateAst(message.payload); + } else if (message.type === 'UPDATE_THEME') { + handlers.applyTheme?.(message.cssUrl); + } + }; +} diff --git a/hub-client/src/components/render/q2-debug/attribution.integration.test.tsx b/hub-client/src/components/render/q2-debug/attribution.integration.test.tsx new file mode 100644 index 000000000..bafd1d834 --- /dev/null +++ b/hub-client/src/components/render/q2-debug/attribution.integration.test.tsx @@ -0,0 +1,143 @@ +/** + * @vitest-environment jsdom + */ +import { describe, it, expect, afterEach } from 'vitest'; +import { render, cleanup, fireEvent } from '@testing-library/react'; +import { Ast } from '../framework'; +import { q2DebugRegistry } from './registry'; + +afterEach(() => { + cleanup(); +}); + +const noopSetAst = () => {}; + +/** + * Phase 5c — when `astContext.attribution` / `astContext.attributionActors` + * are absent, the q2-debug renderer wraps no nodes and paints no colour. + * When they're present, each annotated node gets a `q2-attr-wrap` + * wrapper carrying `color: ` and `data-sid=`. + * Hovering surfaces a single floating badge with the author's name + * and a relative-time string. + * + * Mounted via the framework `Ast` with the `q2DebugRegistry` so we + * exercise the same wiring the iframe uses at runtime. + */ +describe('q2-debug attribution wiring', () => { + it('off path: no q2-attr-wrap and no inline colour', () => { + const ast = { + 'pandoc-api-version': [1, 23, 1], + meta: {}, + blocks: [{ t: 'Para', s: 1, c: [{ t: 'Str', s: 2, c: 'hello' }] }], + }; + const { container } = render( + , + ); + expect(container.querySelector('.q2-attr-wrap')).toBeNull(); + expect(container.querySelector('.q2-attr-badge')).toBeNull(); + // Existing Para label still renders. + expect(container.textContent).toMatch(/Para/); + expect(container.textContent).toMatch(/hello/); + }); + + it('on path: each annotated node gets a colour-only wrapper', () => { + const ast = { + 'pandoc-api-version': [1, 23, 1], + meta: {}, + blocks: [{ t: 'Para', s: 1, c: [{ t: 'Str', s: 2, c: 'hello' }] }], + astContext: { + attribution: [ + { s: 1, actor: 'alice', time: Date.now() }, + { s: 2, actor: 'alice', time: Date.now() }, + ], + attributionActors: { + alice: { name: 'Alice', color: '#ff0000' }, + }, + }, + }; + const { container } = render( + , + ); + + const wraps = container.querySelectorAll('.q2-attr-wrap'); + // One block-level Para wrapper, one inline-level Str wrapper. + expect(wraps.length).toBe(2); + + for (const wrap of Array.from(wraps)) { + const el = wrap as HTMLElement; + // Colour is applied as an inline style — JSDOM normalises rgb(). + expect(el.style.color).toBe('rgb(255, 0, 0)'); + expect(el.getAttribute('data-sid')).toMatch(/^[12]$/); + } + + // No badge yet — hover hasn't fired. + expect(container.querySelector('.q2-attr-badge')).toBeNull(); + }); + + it('hover surfaces a single badge with name + relative time', () => { + const ast = { + 'pandoc-api-version': [1, 23, 1], + meta: {}, + blocks: [{ t: 'Para', s: 1, c: [{ t: 'Str', s: 2, c: 'hello' }] }], + astContext: { + attribution: [ + // 90 seconds ago → "1m ago". + { s: 1, actor: 'alice', time: Date.now() - 90_000 }, + { s: 2, actor: 'alice', time: Date.now() - 90_000 }, + ], + attributionActors: { + alice: { name: 'Alice', color: '#ff0000' }, + }, + }, + }; + const { container } = render( + , + ); + + const wrap = container.querySelector('.q2-attr-wrap[data-sid="2"]') as HTMLElement; + expect(wrap).not.toBeNull(); + fireEvent.mouseOver(wrap); + + const badge = container.querySelector('.q2-attr-badge') as HTMLElement | null; + expect(badge).not.toBeNull(); + expect(badge!.textContent).toMatch(/Alice/); + expect(badge!.textContent).toMatch(/m ago/); + }); + + it('on path: actor with no entry in attributionActors falls through', () => { + const ast = { + 'pandoc-api-version': [1, 23, 1], + meta: {}, + blocks: [{ t: 'Para', s: 1, c: [{ t: 'Str', s: 2, c: 'world' }] }], + astContext: { + attribution: [{ s: 1, actor: 'ghost', time: Date.now() }], + attributionActors: {}, // no entry for "ghost" + }, + }; + const { container } = render( + , + ); + expect(container.querySelector('.q2-attr-wrap')).toBeNull(); + expect(container.querySelector('.q2-attr-badge')).toBeNull(); + }); +}); diff --git a/hub-client/src/components/render/q2-debug/components.tsx b/hub-client/src/components/render/q2-debug/components.tsx index 8e54de137..370de0faf 100644 --- a/hub-client/src/components/render/q2-debug/components.tsx +++ b/hub-client/src/components/render/q2-debug/components.tsx @@ -1,5 +1,6 @@ import React from 'react'; import { Node, renderChildren } from '../framework/dispatch'; +import { useAttributionHover } from '../framework'; import type { InlineNode, NodeArgs, @@ -199,16 +200,33 @@ export const InlineComponents: Record React.ReactNode> = Quoted, }; +/** + * q2-debug document root. Delegates the badge stylesheet / hover + * handler / overlay wiring to `useAttributionHover`. Off-path the + * hook returns inert `hostProps` / `null` overlay+stylesheet, so the + * rendered DOM is byte-identical to pre-attribution. + */ export const AstRenderer = ({ ast, onNavigateToDocument, setAst }: { ast: PandocAST; onNavigateToDocument?: (path: string, anchor: string | null) => void; setAst: (newAst: PandocAST) => void; -}) => ( -
- {renderChildren({ - node: ast as any, - setLocalAst: setAst as any, - onNavigateToDocument - })} -
-); +}) => { + const attr = useAttributionHover(); + return ( + <> + {attr.stylesheet} +
+ {renderChildren({ + node: ast as any, + setLocalAst: setAst as any, + onNavigateToDocument, + })} + {attr.overlay} +
+ + ); +}; diff --git a/hub-client/src/components/render/q2-debug/dispatchers.tsx b/hub-client/src/components/render/q2-debug/dispatchers.tsx index 30a141624..0efd0d346 100644 --- a/hub-client/src/components/render/q2-debug/dispatchers.tsx +++ b/hub-client/src/components/render/q2-debug/dispatchers.tsx @@ -1,5 +1,6 @@ import { useContext } from 'react'; import { RegistryContext } from '../framework/RegistryContext'; +import { AttributionWrap } from '../framework'; import type { BlockNode, InlineNode, NodeArgs } from '../framework/types'; import { blockStyle, inlineStyle } from './styles'; @@ -7,21 +8,34 @@ import { blockStyle, inlineStyle } from './styles'; * q2-debug Block dispatcher: looks up the format registry by Pandoc tag * and renders the corresponding leaf component, falling back to a bordered * "Not registered" message when no component is registered for the tag. + * + * Phase 5c — when attribution is on for this node, `AttributionWrap` + * emits a `.q2-attr-wrap` div carrying `data-sid` + inline `color` + * so the descendant text inherits the author's identity colour. + * Off path the wrap is a pass-through, leaving the dispatcher output + * byte-identical to pre-attribution. */ export const Block = (args: NodeArgs) => { const { registry } = useContext(RegistryContext); - const Component = registry[args.node.t]; - return Component ? :
Not registered: {args.node.t}
; + const inner = Component + ? + :
Not registered: {args.node.t}
; + + return {inner}; } /** * q2-debug Inline dispatcher: same as Block but for inline-level nodes, - * with the inline-flavored "Not registered" miss path. + * with the inline-flavored "Not registered" miss path and a `` + * attribution wrap. */ export const Inline = (args: NodeArgs) => { const { registry } = useContext(RegistryContext); - const Component = registry[args.node.t]; - return Component ? : Not registered: {args.node.t}; + const inner = Component + ? + : Not registered: {args.node.t}; + + return {inner}; } diff --git a/hub-client/src/components/render/q2-debug/entry.tsx b/hub-client/src/components/render/q2-debug/entry.tsx index f94fc64a6..97f146fa7 100644 --- a/hub-client/src/components/render/q2-debug/entry.tsx +++ b/hub-client/src/components/render/q2-debug/entry.tsx @@ -28,6 +28,10 @@ import { q2DebugRegistry, } from '.'; import { buildCustomRegistry, type ComponentExports } from '../../../utils/customRegistry'; +import { + makeIframeMessageDispatcher, + type IframeMessage, +} from '../iframeMessageDispatch'; // Set the renderer-surface global at module top so importing this // module is sufficient to populate `window.__REACT_AST_DEBUG_RENDERER__` @@ -48,35 +52,25 @@ import { buildCustomRegistry, type ComponentExports } from '../../../utils/custo let root: ReturnType | null = null; let customRegistry: Record> = {}; -let componentsLoading = false; interface UpdateAstPayload { astJson: string; currentFilePath: string; } -// Handle messages from parent window -window.addEventListener('message', async (event) => { - // In production, verify event.origin for security +// Shared dispatcher gates UPDATE_AST on the in-flight +// LOAD_CUSTOM_COMPONENTS promise so two UPDATE_ASTs queued during +// component load run in arrival order. See +// `../iframeMessageDispatch.ts` for the rationale (the previous +// setInterval-polling pattern was phase-racy). +const dispatch = makeIframeMessageDispatcher({ + loadCustomComponents, + updateAst: (payload) => updateAst(payload as UpdateAstPayload), +}); - if (event.data.type === 'LOAD_CUSTOM_COMPONENTS') { - componentsLoading = true; - await loadCustomComponents(event.data.componentsCode); - componentsLoading = false; - } else if (event.data.type === 'UPDATE_AST') { - // Wait for components to finish loading before rendering - if (componentsLoading) { - await new Promise(resolve => { - const check = setInterval(() => { - if (!componentsLoading) { - clearInterval(check); - resolve(undefined); - } - }, 50); - }); - } - updateAst(event.data.payload); - } +window.addEventListener('message', (event) => { + // In production, verify event.origin for security + dispatch(event.data as IframeMessage); }); /** diff --git a/hub-client/src/components/render/q2-preview/PreviewDocument.tsx b/hub-client/src/components/render/q2-preview/PreviewDocument.tsx index c1de1d183..482b4bc0f 100644 --- a/hub-client/src/components/render/q2-preview/PreviewDocument.tsx +++ b/hub-client/src/components/render/q2-preview/PreviewDocument.tsx @@ -4,6 +4,7 @@ import { extractMetaString, extractMetaBool, RegistryContext, + useAttributionHover, } from '../framework'; import type { BlockNode, PandocAST } from '../framework'; import * as Custom from './custom'; @@ -94,6 +95,14 @@ export const PreviewDocument = ({ }; }, [meta]); + // Attribution wiring (Phase 3 of + // `2026-05-13-q2-preview-attribution.md`): delegated to + // `useAttributionHover`, which returns inert wiring when + // `AttributionLookupContext` is unpopulated — off-path DOM stays + // byte-identical to pre-attribution. Same hook is consumed by + // q2-debug's `AstRenderer`. + const attr = useAttributionHover(); + const children = renderChildren({ node: ast as any, setLocalAst: setAst as any, @@ -114,27 +123,45 @@ export const PreviewDocument = ({ Array.isArray((b as { c?: unknown[] }).c) && ((b as { c: unknown[] }).c[0] === 1), ); - return ( + const minimalInner = ( <> {title && !hasLevel1Header ?

{title}

: null} {children} ); + // When attribution is on we need a host element to carry the + // mouseover delegation. Off-path stay on the Fragment so the + // minimal-mode DOM is byte-identical to today's. + if (attr.enabled) { + return ( + <> + {attr.stylesheet} +
{minimalInner}
+ {attr.overlay} + + ); + } + return minimalInner; } return ( -
-
- - {children} -
-
+ <> + {attr.stylesheet} +
+
+ + {children} +
+
+ {attr.overlay} + ); }; diff --git a/hub-client/src/components/render/q2-preview/attribution.integration.test.tsx b/hub-client/src/components/render/q2-preview/attribution.integration.test.tsx new file mode 100644 index 000000000..a7e278a88 --- /dev/null +++ b/hub-client/src/components/render/q2-preview/attribution.integration.test.tsx @@ -0,0 +1,144 @@ +/** + * @vitest-environment jsdom + */ +import { describe, it, expect, afterEach } from 'vitest'; +import { render, cleanup, fireEvent } from '@testing-library/react'; +import { Ast } from '../framework'; +import { previewRegistry } from './registry'; + +afterEach(() => { + cleanup(); +}); + +const noopSetAst = () => {}; + +/** + * Phase 3 of `2026-05-13-q2-preview-attribution.md` — q2-preview + * sibling of `q2-debug/attribution.integration.test.tsx`. Same four + * scenarios (off path; on path wrapping; hover surfaces badge; + * missing actor identity falls through) against the `previewRegistry`. + * + * The interesting structural difference from q2-debug is that + * `previewRegistry.Ast = PreviewDocument` — the document-root wrapper + * that injects `attributionStyles` and attaches mouseover/mouseout + * delegation on the q2-preview side. q2-debug's `AstRenderer` plays + * the same role; the two formats now share the `framework/attribution.tsx` + * widget but mount it from their respective root components. + */ +describe('q2-preview attribution wiring', () => { + it('off path: no q2-attr-wrap and no inline colour', () => { + const ast = { + 'pandoc-api-version': [1, 23, 1], + meta: {}, + blocks: [{ t: 'Para', s: 1, c: [{ t: 'Str', s: 2, c: 'hello' }] }], + }; + const { container } = render( + , + ); + expect(container.querySelector('.q2-attr-wrap')).toBeNull(); + expect(container.querySelector('.q2-attr-badge')).toBeNull(); + // Existing prose still renders through the preview leaves. + expect(container.textContent).toMatch(/hello/); + }); + + it('on path: each annotated node gets a colour-only wrapper', () => { + const ast = { + 'pandoc-api-version': [1, 23, 1], + meta: {}, + blocks: [{ t: 'Para', s: 1, c: [{ t: 'Str', s: 2, c: 'hello' }] }], + astContext: { + attribution: [ + { s: 1, actor: 'alice', time: Date.now() }, + { s: 2, actor: 'alice', time: Date.now() }, + ], + attributionActors: { + alice: { name: 'Alice', color: '#ff0000' }, + }, + }, + }; + const { container } = render( + , + ); + + const wraps = container.querySelectorAll('.q2-attr-wrap'); + // One block-level Para wrapper, one inline-level Str wrapper. + expect(wraps.length).toBe(2); + + for (const wrap of Array.from(wraps)) { + const el = wrap as HTMLElement; + // Colour is applied as an inline style — JSDOM normalises rgb(). + expect(el.style.color).toBe('rgb(255, 0, 0)'); + expect(el.getAttribute('data-sid')).toMatch(/^[12]$/); + } + + // No badge yet — hover hasn't fired. + expect(container.querySelector('.q2-attr-badge')).toBeNull(); + }); + + it('hover surfaces a single badge with name + relative time', () => { + const ast = { + 'pandoc-api-version': [1, 23, 1], + meta: {}, + blocks: [{ t: 'Para', s: 1, c: [{ t: 'Str', s: 2, c: 'hello' }] }], + astContext: { + attribution: [ + // 90 seconds ago → "1m ago". + { s: 1, actor: 'alice', time: Date.now() - 90_000 }, + { s: 2, actor: 'alice', time: Date.now() - 90_000 }, + ], + attributionActors: { + alice: { name: 'Alice', color: '#ff0000' }, + }, + }, + }; + const { container } = render( + , + ); + + const wrap = container.querySelector('.q2-attr-wrap[data-sid="2"]') as HTMLElement; + expect(wrap).not.toBeNull(); + fireEvent.mouseOver(wrap); + + const badge = container.querySelector('.q2-attr-badge') as HTMLElement | null; + expect(badge).not.toBeNull(); + expect(badge!.textContent).toMatch(/Alice/); + expect(badge!.textContent).toMatch(/m ago/); + }); + + it('on path: actor with no entry in attributionActors falls through', () => { + const ast = { + 'pandoc-api-version': [1, 23, 1], + meta: {}, + blocks: [{ t: 'Para', s: 1, c: [{ t: 'Str', s: 2, c: 'world' }] }], + astContext: { + attribution: [{ s: 1, actor: 'ghost', time: Date.now() }], + attributionActors: {}, // no entry for "ghost" + }, + }; + const { container } = render( + , + ); + expect(container.querySelector('.q2-attr-wrap')).toBeNull(); + expect(container.querySelector('.q2-attr-badge')).toBeNull(); + }); +}); diff --git a/hub-client/src/components/render/q2-preview/dispatchers.tsx b/hub-client/src/components/render/q2-preview/dispatchers.tsx index e1ac979bf..5d7d7db9b 100644 --- a/hub-client/src/components/render/q2-preview/dispatchers.tsx +++ b/hub-client/src/components/render/q2-preview/dispatchers.tsx @@ -1,6 +1,6 @@ import { useContext } from 'react'; import { RegistryContext } from '../framework/RegistryContext'; -import { renderChildren } from '../framework'; +import { AttributionWrap, renderChildren } from '../framework'; import type { BlockNode, CustomBlockNode, @@ -34,32 +34,45 @@ const PLACEHOLDER_CLASS = 'q2-preview-placeholder'; * smoke-fixture must-not-match selector (`div.q2-preview-placeholder`) * actually fires when the placeholder fires. The inline `style` is * preserved alongside (no theme-CSS dependency). + * + * Attribution wrap (Phase 3 of `2026-05-13-q2-preview-attribution.md`): + * `AttributionWrap` paints the dispatched output with a `.q2-attr-wrap` + * div carrying `data-sid` and inline `color` whenever this node has + * resolved attribution; off-path it is a pass-through, so the + * dispatcher output is byte-identical to pre-attribution. */ export const Block = (args: NodeArgs) => { const { registry } = useContext(RegistryContext); const Component = registry[args.node.t]; - if (Component) return ; - return ( + const inner = Component ? ( + + ) : (
{args.node.t} (not yet implemented){renderChildren(args)}
); + + return {inner}; }; /** * q2-preview's Inline dispatcher. Same pattern as `Block` for * inline-level nodes — placeholder + recursion on miss so nested - * inlines surface their own placeholders. + * inlines surface their own placeholders. Also wraps in + * `.q2-attr-wrap` when attribution is resolved. */ export const Inline = (args: NodeArgs) => { const { registry } = useContext(RegistryContext); const Component = registry[args.node.t]; - if (Component) return ; - return ( + const inner = Component ? ( + + ) : ( {args.node.t} (not yet implemented){renderChildren(args)} ); + + return {inner}; }; /** @@ -73,12 +86,19 @@ export const Inline = (args: NodeArgs) => { * same key namespace; the two sets are disjoint by project policy * (locked at build time by `registry.test.ts`'s namespace-disjoint * assertion). + * + * Attribution wrap: same as `Block`. CustomNodes (Callout, Theorem, + * FloatRefTarget, ...) cover larger source ranges than primitive + * blocks, so attributing them paints the whole containing block in + * the author's colour. */ export const CustomBlock = (args: NodeArgs) => { const { registry } = useContext(RegistryContext); const Component = registry[args.node.type_name] ?? registry['__fallback__']; - return ; + const inner = ; + + return {inner}; }; /** @@ -89,5 +109,7 @@ export const CustomInline = (args: NodeArgs) => { const { registry } = useContext(RegistryContext); const Component = registry[args.node.type_name] ?? registry['__fallback__']; - return ; + const inner = ; + + return {inner}; }; diff --git a/hub-client/src/components/render/q2-preview/entry.tsx b/hub-client/src/components/render/q2-preview/entry.tsx index 8b7f30533..0bea3807a 100644 --- a/hub-client/src/components/render/q2-preview/entry.tsx +++ b/hub-client/src/components/render/q2-preview/entry.tsx @@ -53,6 +53,10 @@ import { type ComponentExports, } from '../../../utils/customRegistry'; import { installLinkHandlers } from '../../../utils/iframeLinkHandlers'; +import { + makeIframeMessageDispatcher, + type IframeMessage, +} from '../iframeMessageDispatch'; // Set the renderer-surface global at module top. Importing this module // is sufficient to populate `window.__Q2_PREVIEW_RENDERER__`. The @@ -89,7 +93,6 @@ import { installLinkHandlers } from '../../../utils/iframeLinkHandlers'; let root: ReturnType | null = null; let customRegistry: Record> = {}; -let componentsLoading = false; interface UpdateAstPayload { astJson: string; @@ -105,29 +108,22 @@ interface UpdateAstPayload { assetManifest?: Record; } +// Shared dispatcher gates UPDATE_AST on the in-flight +// LOAD_CUSTOM_COMPONENTS promise so two UPDATE_ASTs queued during +// component load run in arrival order. See +// `../iframeMessageDispatch.ts` for the rationale (the previous +// setInterval-polling pattern was phase-racy). +const dispatch = makeIframeMessageDispatcher({ + loadCustomComponents, + updateAst: (payload) => updateAst(payload as UpdateAstPayload), + applyTheme, +}); + // Module-top message handler. Registered before `IFRAME_READY` is // posted so the parent's `UPDATE_THEME` (which can fire immediately // after `IFRAME_READY` from a sibling `useEffect`) is never dropped. -window.addEventListener('message', async (event) => { - if (event.data.type === 'LOAD_CUSTOM_COMPONENTS') { - componentsLoading = true; - await loadCustomComponents(event.data.componentsCode); - componentsLoading = false; - } else if (event.data.type === 'UPDATE_AST') { - if (componentsLoading) { - await new Promise((resolve) => { - const check = setInterval(() => { - if (!componentsLoading) { - clearInterval(check); - resolve(undefined); - } - }, 50); - }); - } - updateAst(event.data.payload); - } else if (event.data.type === 'UPDATE_THEME') { - applyTheme(event.data.cssUrl); - } +window.addEventListener('message', (event) => { + dispatch(event.data as IframeMessage); }); /** diff --git a/hub-client/src/components/tabs/SettingsTab.tsx b/hub-client/src/components/tabs/SettingsTab.tsx index 56f89f97f..2be6149dd 100644 --- a/hub-client/src/components/tabs/SettingsTab.tsx +++ b/hub-client/src/components/tabs/SettingsTab.tsx @@ -22,6 +22,7 @@ export default function SettingsTab({ onScrollSyncChange, }: SettingsTabProps) { const [errorOverlayCollapsed, setErrorOverlayCollapsed] = usePreference('errorOverlayCollapsed'); + const [attributionEnabled, setAttributionEnabled] = usePreference('attributionEnabled'); const [isCapturing, setIsCapturing] = useState(false); const handleScreenshot = async () => { @@ -93,6 +94,18 @@ export default function SettingsTab({ Show errors as a small indicator instead of expanded panel
+
+ ); +} + +function Reader() { + const [value] = usePreference('attributionEnabled'); + return reader: {String(value)}; +} + +describe('usePreference cross-instance reactivity', () => { + beforeEach(() => { + localStorage.clear(); + }); + + it('a setValue in one component is observed by a sibling instance', () => { + render( + <> + + + , + ); + + // Both start at default (false for attributionEnabled). + expect(screen.getByTestId('writer').textContent).toBe('writer: false'); + expect(screen.getByTestId('reader').textContent).toBe('reader: false'); + + // Toggle in the writer — the reader observes the change without + // remounting. + fireEvent.click(screen.getByTestId('writer')); + + expect(screen.getByTestId('writer').textContent).toBe('writer: true'); + expect(screen.getByTestId('reader').textContent).toBe('reader: true'); + }); +}); diff --git a/hub-client/src/hooks/usePreference.ts b/hub-client/src/hooks/usePreference.ts index 4fe3ab1cb..37339b734 100644 --- a/hub-client/src/hooks/usePreference.ts +++ b/hub-client/src/hooks/usePreference.ts @@ -1,12 +1,25 @@ -import { useState, useCallback } from 'react'; -import type { PreferenceKey, UserPreferences } from '../services/preferences'; -import { getPreference, setPreference } from '../services/preferences'; +import { useState, useCallback, useEffect } from 'react'; +import type { + PreferenceChangeDetail, + PreferenceKey, + UserPreferences, +} from '../services/preferences'; +import { + PREFERENCE_CHANGE_EVENT, + getPreference, + setPreference, +} from '../services/preferences'; /** * React hook for reading and updating a user preference. * Returns a tuple like useState: [value, setValue] * * The value is initialized from localStorage and persisted on update. + * Every `usePreference(key)` instance in the window stays in sync — + * `setPreference` dispatches a same-window `PREFERENCE_CHANGE_EVENT` + * and each instance re-reads when it matches `key`. This lets a + * toggle in one component (e.g. SettingsTab) take effect immediately + * in a sibling component (e.g. ReactPreview) without a page refresh. * * @param key - The preference key to read/write * @returns [currentValue, updateFunction] @@ -24,5 +37,16 @@ export function usePreference( [key] ); + useEffect(() => { + const onChange = (event: Event) => { + const detail = (event as CustomEvent).detail; + if (detail?.key === key) { + setValue(getPreference(key)); + } + }; + window.addEventListener(PREFERENCE_CHANGE_EVENT, onChange); + return () => window.removeEventListener(PREFERENCE_CHANGE_EVENT, onChange); + }, [key]); + return [value, updateValue]; } diff --git a/hub-client/src/hooks/useReplayMode.ts b/hub-client/src/hooks/useReplayMode.ts index ae2fea331..6140b2194 100644 --- a/hub-client/src/hooks/useReplayMode.ts +++ b/hub-client/src/hooks/useReplayMode.ts @@ -28,12 +28,6 @@ export interface ReplayState { chunkActors: ChunkActorShare[][]; // per-chunk actor fractions for the waveform } -/** Deterministic color from an actor hash string. */ -export function actorColor(actor: string): string { - const hue = parseInt(actor.slice(0, 6), 16) % 360; - return `hsl(${hue}, 60%, 55%)`; -} - export interface ReplayControls { enter: () => void; exit: () => void; diff --git a/hub-client/src/services/attribution-runs.test.ts b/hub-client/src/services/attribution-runs.test.ts new file mode 100644 index 000000000..c295ed2de --- /dev/null +++ b/hub-client/src/services/attribution-runs.test.ts @@ -0,0 +1,77 @@ +/** + * Tests for the producer half of the attribution pipeline. + * + * Focused on the bits that are new for the implementation branch + * (char→byte translation, payload shape). The run-list invariants + * proper are covered exhaustively on `feat/node-attribution` and pinned + * cross-implementation by `crates/quarto-core/tests/attribution_types.rs` + * (`query_byte_range` invariants — Phase 0 test #2). + * + * @vitest-environment jsdom + */ + +import { describe, it, expect } from 'vitest'; + +import { + buildCharToByteMap, + runsCharToByteOffsets, + type AttributionRun, +} from './attribution-runs'; + +describe('buildCharToByteMap', () => { + it('is the identity for ASCII text', () => { + const map = buildCharToByteMap('hello world'); + expect(map.length).toBe(12); // 11 chars + terminator + for (let i = 0; i <= 11; i++) expect(map[i]).toBe(i); + }); + + it('counts 2-byte UTF-8 sequences correctly', () => { + // "é" is U+00E9, 2 bytes in UTF-8 (0xc3 0xa9). + const map = buildCharToByteMap('aéb'); + // 'a' at char 0 → byte 0; 'é' at char 1 → byte 1; 'b' at char 2 → byte 3. + expect(map).toEqual([0, 1, 3, 4]); + }); + + it('counts 3-byte UTF-8 sequences correctly (CJK)', () => { + // "中" is U+4E2D, 3 bytes in UTF-8 (0xe4 0xb8 0xad). + const map = buildCharToByteMap('a中b'); + expect(map).toEqual([0, 1, 4, 5]); + }); + + it('handles surrogate-pair (4-byte) codepoints', () => { + // "𝕏" is U+1D54F, 4 bytes in UTF-8 — JS represents it as 2 UTF-16 + // code units (surrogate pair). Char 0 and char 1 are the two halves. + const map = buildCharToByteMap('a𝕏b'); + expect(map.length).toBe(5); + expect(map[0]).toBe(0); // 'a' + expect(map[1]).toBe(1); // high surrogate of '𝕏' + expect(map[2]).toBe(5); // low surrogate — past the 4-byte sequence + expect(map[3]).toBe(5); // 'b' + expect(map[4]).toBe(6); // EOS + }); +}); + +describe('runsCharToByteOffsets', () => { + it('translates char-indexed runs through a non-ASCII map', () => { + // Document text: "a中b" (3 chars, 5 bytes). One run spans the + // whole text in char offsets [0..3). + const sourceText = 'a中b'; + const map = buildCharToByteMap(sourceText); + const runs: AttributionRun[] = [ + { start: 0, end: 3, actor: 'alice', time: 1 }, + ]; + const out = runsCharToByteOffsets(runs, map); + expect(out).toEqual([{ start: 0, end: 5, actor: 'alice', time: 1 }]); + }); + + it('is the identity for ASCII inputs', () => { + const sourceText = 'hello world'; + const map = buildCharToByteMap(sourceText); + const runs: AttributionRun[] = [ + { start: 0, end: 5, actor: 'alice', time: 1 }, + { start: 6, end: 11, actor: 'bob', time: 2 }, + ]; + const out = runsCharToByteOffsets(runs, map); + expect(out).toEqual(runs); + }); +}); diff --git a/hub-client/src/services/attribution-runs.ts b/hub-client/src/services/attribution-runs.ts new file mode 100644 index 000000000..6e96783fd --- /dev/null +++ b/hub-client/src/services/attribution-runs.ts @@ -0,0 +1,409 @@ +/** + * Run-length-encoded attribution **producer** for Automerge documents. + * + * Replays the Automerge history of a text field and emits a sorted, + * non-overlapping, contiguous run list `[{start, end, actor, time}, ...]` + * keyed in **JS character offsets** (Automerge splice positions = UTF-16 + * code units). Consumers must convert to UTF-8 byte offsets before + * shipping to the Rust pipeline; see `useAttribution` for that step. + * + * Only the producer side lives here. Per the Phase 5 plan, the + * consumer-side query / reconstruction / cache code from the prototype's + * `attribution.ts` / `attribution-runs.ts` is deliberately *not* ported + * — the Rust `AttributionMap::query_byte_range` replaces it. + * + * Algorithm reference (and known-good baseline): the prototype branch + * `feat/node-attribution` carries this file along with the consumer-side + * surface and the `attribution-runs.test.ts` invariant suite. + */ + +import { diff } from '@automerge/automerge'; +import type { Heads } from '@automerge/automerge'; +import { decodeHeads } from '@automerge/automerge-repo'; +import type { DocHandle } from '@automerge/automerge-repo'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface CharAttribution { + actor: string; + time: number; +} + +export interface AttributionRun { + /** inclusive char offset */ + start: number; + /** exclusive char offset */ + end: number; + actor: string; + time: number; +} + +export interface RunListAttribution { + runs: AttributionRun[]; + processedHeads: unknown[]; + processedHistoryIndex: number; +} + +interface SplicePatch { + action: 'splice'; + path: [string, number]; + value: string; +} + +interface DelPatch { + action: 'del'; + path: [string, number]; + length?: number; +} + +interface PutPatch { + action: 'put'; + path: [string]; + value: string; +} + +export type TextPatch = SplicePatch | DelPatch | PutPatch; + +export interface ViewableHandle { + history(): unknown[] | undefined; + metadata(change?: string): { time?: number; actor?: string } | undefined; + doc(): unknown; +} + +export class HistoryCompactedError extends Error { + constructor() { + super('History has been compacted — full rebuild required'); + this.name = 'HistoryCompactedError'; + } +} + +export function isTextPatch(patch: unknown, textFieldName: string): patch is TextPatch { + const p = patch as { action?: string; path?: unknown[] }; + if (!p || !Array.isArray(p.path) || p.path[0] !== textFieldName) return false; + return p.action === 'splice' || p.action === 'del' || p.action === 'put'; +} + +export function extractChangeHash(heads: unknown): string | null { + const h = Array.isArray(heads) ? heads[0] : heads; + return typeof h === 'string' ? h : null; +} + +/** + * History entries processed between idle-callback yields. Larger + * values reduce the number of rIC round trips (faster + * time-to-attribution) but make each slice's CPU block bigger (more + * frame jank risk). 500 gives ~2.5 ms of CPU per slice at the + * prototype's bench-measured ~5 µs/entry. + */ +export const CHUNK_SIZE = 500; + +export function waitForIdle(timeout = 100): Promise { + return new Promise(resolve => { + if (typeof requestIdleCallback === 'function') { + requestIdleCallback(() => resolve(), { timeout }); + } else { + setTimeout(resolve, 0); + } + }); +} + +// --------------------------------------------------------------------------- +// Internal: patch application on run list +// --------------------------------------------------------------------------- + +/** Binary search for first run whose `end > p`. Returns runs.length if none. */ +function findFirstRunEndingAfter(runs: AttributionRun[], p: number): number { + let lo = 0; + let hi = runs.length; + while (lo < hi) { + const mid = (lo + hi) >> 1; + if (runs[mid].end <= p) lo = mid + 1; + else hi = mid; + } + return lo; +} + +function runsInsert( + runs: AttributionRun[], + p: number, + k: number, + attr: CharAttribution, +): void { + if (k === 0) return; + let lo = findFirstRunEndingAfter(runs, p); + + if (lo < runs.length && runs[lo].start < p) { + const r = runs[lo]; + runs.splice(lo + 1, 0, { start: p, end: r.end, actor: r.actor, time: r.time }); + r.end = p; + lo++; + } + + for (let i = lo; i < runs.length; i++) { + runs[i].start += k; + runs[i].end += k; + } + + runs.splice(lo, 0, { start: p, end: p + k, actor: attr.actor, time: attr.time }); + maybeMergeAt(runs, lo); +} + +function runsDelete(runs: AttributionRun[], p: number, len: number): void { + if (len === 0) return; + const endPos = p + len; + const lo = findFirstRunEndingAfter(runs, p); + + let i = lo; + while (i < runs.length && runs[i].start < endPos) { + const r = runs[i]; + if (r.start >= p && r.end <= endPos) { + runs.splice(i, 1); + } else if (r.start < p && r.end > endPos) { + r.end -= len; + i++; + } else if (r.start < p) { + r.end = p; + i++; + } else { + r.start = p; + r.end -= len; + i++; + } + } + + for (let j = i; j < runs.length; j++) { + runs[j].start -= len; + runs[j].end -= len; + } + + if (lo > 0 && lo <= runs.length) maybeMergeAt(runs, lo - 1); + if (lo < runs.length) maybeMergeAt(runs, lo); +} + +function maybeMergeAt(runs: AttributionRun[], i: number): void { + if (i < 0 || i >= runs.length) return; + if (i + 1 < runs.length) { + const a = runs[i]; + const b = runs[i + 1]; + if (a.end === b.start && a.actor === b.actor && a.time === b.time) { + a.end = b.end; + runs.splice(i + 1, 1); + } + } + if (i > 0) { + const a = runs[i - 1]; + const b = runs[i]; + if (a.end === b.start && a.actor === b.actor && a.time === b.time) { + a.end = b.end; + runs.splice(i, 1); + } + } +} + +function applyPatchToRuns( + runs: AttributionRun[], + patch: TextPatch, + attr: CharAttribution, +): void { + if (patch.action === 'put') { + const text = typeof patch.value === 'string' ? patch.value : ''; + runs.length = 0; + if (text.length > 0) { + runs.push({ start: 0, end: text.length, actor: attr.actor, time: attr.time }); + } + return; + } + const idx = patch.path[1]; + if (patch.action === 'splice') { + runsInsert(runs, idx, patch.value.length, attr); + } else { + runsDelete(runs, idx, patch.length ?? 1); + } +} + +// --------------------------------------------------------------------------- +// buildRunListAttribution — full history processing +// --------------------------------------------------------------------------- + +export async function buildRunListAttribution( + handle: DocHandle, + textFieldName: string, + signal?: AbortSignal, +): Promise { + const viewable = handle as unknown as ViewableHandle; + const history = viewable.history(); + if (!history) return null; + + if (history.length === 0) { + return { runs: [], processedHeads: [], processedHistoryIndex: 0 }; + } + + const runs: AttributionRun[] = []; + let prevHeads: unknown = null; + let lastHeads: unknown[] = []; + + for (let chunkStart = 0; chunkStart < history.length; chunkStart += CHUNK_SIZE) { + await waitForIdle(); + if (signal?.aborted) return null; + + const chunkEnd = Math.min(chunkStart + CHUNK_SIZE, history.length); + for (let i = chunkStart; i < chunkEnd; i++) { + const currHeads = history[i]; + const changeHash = extractChangeHash(currHeads); + const meta = changeHash ? viewable.metadata(changeHash) : undefined; + const attribution: CharAttribution = { + actor: meta?.actor ?? 'unknown', + time: meta?.time ?? 0, + }; + + const decodedCurr = decodeHeads(currHeads as Parameters[0]); + let patches: unknown[]; + if (prevHeads === null) { + patches = diff( + viewable.doc() as Parameters[0], + [] as unknown as Heads, + decodedCurr as unknown as Heads, + ); + } else { + const decodedPrev = decodeHeads(prevHeads as Parameters[0]); + patches = diff( + viewable.doc() as Parameters[0], + decodedPrev as unknown as Heads, + decodedCurr as unknown as Heads, + ); + } + + for (const patch of patches) { + if (isTextPatch(patch, textFieldName)) { + applyPatchToRuns(runs, patch, attribution); + } + } + + prevHeads = currHeads; + lastHeads = Array.isArray(currHeads) ? currHeads : [currHeads]; + } + } + + return { + runs, + processedHeads: lastHeads as unknown[], + processedHistoryIndex: history.length, + }; +} + +// --------------------------------------------------------------------------- +// updateRunListAttribution — incremental (synchronous) +// --------------------------------------------------------------------------- + +export function updateRunListAttribution( + state: RunListAttribution, + handle: DocHandle, + textFieldName: string, +): RunListAttribution { + const viewable = handle as unknown as ViewableHandle; + const history = viewable.history(); + if (!history) throw new HistoryCompactedError(); + if (state.processedHistoryIndex > history.length) throw new HistoryCompactedError(); + + const runs = state.runs.map(r => ({ ...r })); + let prevHeads = state.processedHeads; + let lastHeads = state.processedHeads; + + for (let i = state.processedHistoryIndex; i < history.length; i++) { + const currHeads = history[i]; + const changeHash = extractChangeHash(currHeads); + const meta = changeHash ? viewable.metadata(changeHash) : undefined; + const attribution: CharAttribution = { + actor: meta?.actor ?? 'unknown', + time: meta?.time ?? 0, + }; + + const decodedPrev = decodeHeads(prevHeads as Parameters[0]); + const decodedCurr = decodeHeads(currHeads as Parameters[0]); + const patches = diff( + viewable.doc() as Parameters[0], + decodedPrev as unknown as Heads, + decodedCurr as unknown as Heads, + ); + + for (const patch of patches) { + if (isTextPatch(patch, textFieldName)) { + applyPatchToRuns(runs, patch, attribution); + } + } + + prevHeads = currHeads as unknown[]; + lastHeads = Array.isArray(currHeads) ? currHeads : [currHeads]; + } + + return { + runs, + processedHeads: lastHeads as unknown[], + processedHistoryIndex: history.length, + }; +} + +// --------------------------------------------------------------------------- +// Char → byte offset translation (for the WASM wire) +// --------------------------------------------------------------------------- + +/** + * Build a JS-char-offset → UTF-8-byte-offset map for `text`. + * + * Indexed by **UTF-16 code unit** (Automerge's splice positions), so + * surrogate-pair halves each get an entry. The map's length is + * `text.length + 1`; `map[text.length]` is the total byte count. + * + * This is the inverse direction of the prototype's `buildByteToCharMap`. + * The Rust pipeline's `SourceInfo` carries byte offsets, so producer runs + * must be byte-translated before serializing for `PreBuiltAttributionProvider`. + * + * ASCII-only docs: map is the identity. Non-ASCII docs require this + * translation for correctness — a missing translation would silently + * misattribute any range past the first multi-byte character. + */ +export function buildCharToByteMap(text: string): number[] { + const map = new Array(text.length + 1); + let byteOff = 0; + for (let i = 0; i < text.length; i++) { + map[i] = byteOff; + const ch = text.charCodeAt(i); + // Surrogate pair (4-byte UTF-8) — first half here, low half on next iter. + if (ch >= 0xd800 && ch <= 0xdbff && i + 1 < text.length) { + const low = text.charCodeAt(i + 1); + if (low >= 0xdc00 && low <= 0xdfff) { + byteOff += 4; + i++; // skip low surrogate index in the outer loop + map[i] = byteOff; // boundary entry: low-surrogate index points past the 4 bytes + continue; + } + } + if (ch < 0x80) byteOff += 1; + else if (ch < 0x800) byteOff += 2; + else byteOff += 3; + } + map[text.length] = byteOff; + return map; +} + +/** + * Translate a `runs[]` slice from char offsets to byte offsets using the + * map produced by `buildCharToByteMap`. Returns a fresh array — the + * input is not mutated. + */ +export function runsCharToByteOffsets( + runs: AttributionRun[], + charToByte: number[], +): AttributionRun[] { + return runs.map(r => ({ + start: charToByte[r.start] ?? r.start, + end: charToByte[r.end] ?? r.end, + actor: r.actor, + time: r.time, + })); +} + +// Exposed for tests. +export const __internal = { runsInsert, runsDelete, applyPatchToRuns, findFirstRunEndingAfter }; diff --git a/hub-client/src/services/preferences/index.ts b/hub-client/src/services/preferences/index.ts index 41134989d..b4873a548 100644 --- a/hub-client/src/services/preferences/index.ts +++ b/hub-client/src/services/preferences/index.ts @@ -33,6 +33,13 @@ export function getPreference( /** * Update a single preference value. * Cannot update the version field. + * + * Dispatches `PREFERENCE_CHANGE_EVENT` so every `usePreference(key)` + * instance in this window can re-read the new value. The browser's + * native `storage` event only fires in *other* windows, so a custom + * same-window event is required for in-page reactivity (toggling in + * SettingsTab and having ReactPreview re-render without a manual + * page refresh). */ export function setPreference( key: K, @@ -41,4 +48,20 @@ export function setPreference( const current = getPreferences(); const updated = { ...current, [key]: value }; localStorage.setItem(STORAGE_KEY, JSON.stringify(updated)); + if (typeof window !== 'undefined') { + window.dispatchEvent( + new CustomEvent(PREFERENCE_CHANGE_EVENT, { detail: { key } }), + ); + } +} + +/** + * Name of the same-window CustomEvent dispatched by [`setPreference`]. + * `detail.key` is the [`PreferenceKey`] that changed; consumers + * filter on it. + */ +export const PREFERENCE_CHANGE_EVENT = 'quarto-hub:preference-change'; + +export interface PreferenceChangeDetail { + key: PreferenceKey; } diff --git a/hub-client/src/services/preferences/schema.ts b/hub-client/src/services/preferences/schema.ts index eaea9ab10..cafbec6fa 100644 --- a/hub-client/src/services/preferences/schema.ts +++ b/hub-client/src/services/preferences/schema.ts @@ -10,6 +10,15 @@ export const UserPreferencesSchema = z.object({ scrollSyncEnabled: z.boolean(), errorOverlayCollapsed: z.boolean(), colorScheme: ColorSchemeSchema, + // Authorship overlay (Phase 5c). Off by default — colours node + // borders/labels in the q2-debug preview by their last-touch + // Automerge actor, with display name + colour resolved by + // `useAttribution` (replay + fnv1a fallback) and pre-baked into + // `astContext.attribution` / `astContext.attributionActors` by the + // Rust render transform. `.default(false)` so localStorage entries + // written before this key existed don't fail validation and reset + // every other preference. + attributionEnabled: z.boolean().default(false), }); // Infer TypeScript type from schema @@ -24,6 +33,7 @@ export const DEFAULT_PREFERENCES: UserPreferences = { scrollSyncEnabled: true, errorOverlayCollapsed: true, // collapsed by default colorScheme: 'auto', + attributionEnabled: false, // opt-in surfacing of author identities }; // Validation function - returns valid preferences or defaults diff --git a/hub-client/src/services/wasmRenderer.ts b/hub-client/src/services/wasmRenderer.ts index dc30ca423..b38e15613 100644 --- a/hub-client/src/services/wasmRenderer.ts +++ b/hub-client/src/services/wasmRenderer.ts @@ -53,10 +53,26 @@ interface WasmModuleExtended { path: string, user_grammars?: unknown, ) => Promise; + // Attribution-aware sibling. When `attribution_json` is `undefined`, + // behaviour is byte-identical to `render_page_in_project` (the + // former is a one-line wrapper that forwards `None`). When + // populated, the active-page `RenderContext` gets a + // `PreBuiltAttributionProvider` installed, the q2-preview transform + // pipeline's attribution stages fire, and the returned AST JSON + // carries `astContext.attribution` + `astContext.attributionActors`. + render_page_in_project_with_attribution: ( + path: string, + user_grammars?: unknown, + attribution_json?: string, + ) => Promise; get_builtin_template: (name: string) => string; get_project_choices: () => string; create_project: (choiceId: string, title: string) => Promise; parse_qmd_to_ast: (content: string) => Promise; + parse_qmd_to_ast_with_attribution: ( + content: string, + attribution_json: string | undefined, + ) => Promise; write_qmd: (astJson: string) => Promise; incremental_write_qmd(original_qmd: string, new_ast_json: string): string; convert: (document: string, inputFormat: string, outputFormat: string) => Promise; @@ -396,9 +412,48 @@ export async function renderQmd( export async function renderPageInProject( path: string, userGrammars?: unknown, +): Promise { + return renderPageInProjectWithAttribution(path, userGrammars, null); +} + +/** + * Render a single page **in the context of its surrounding project**, + * optionally with attribution data. + * + * Q2-preview sibling of `parseQmdToAstWithAttribution`. When + * `attributionJson` is non-null, the JSON string is shipped to the + * Rust `PreBuiltAttributionProvider` installed on the active page's + * `RenderContext`; the q2-preview transform pipeline's attribution + * stages fire and the returned AST JSON carries + * `astContext.attribution` (`{s, actor, time}` records) and + * `astContext.attributionActors` (actor → `{name, color}` table). + * The `` component picks these keys up automatically and + * threads them through `AttributionLookupContext`, which the + * leaf renderers consume to paint per-author backgrounds and + * provide author tooltips. + * + * When `null`, the call is byte-identical to + * `renderPageInProject(path, userGrammars)` — same code path, no + * provider installed, no transforms surface attribution data. + * + * The producer of `attributionJson` is responsible for satisfying + * the Phase 6 invariant: every actor referenced in `runs` must have + * an entry in `identities`. See `useAttribution` for the hub-client + * producer that builds this payload from Automerge history. + */ +export async function renderPageInProjectWithAttribution( + path: string, + userGrammars: unknown, + attributionJson: string | null, ): Promise { const wasm = getWasm(); - return JSON.parse(await wasm.render_page_in_project(path, userGrammars)); + return JSON.parse( + await wasm.render_page_in_project_with_attribution( + path, + userGrammars, + attributionJson ?? undefined, + ), + ); } /** @@ -489,11 +544,38 @@ export interface ConvertResult { */ export async function parseQmdToAst( qmdContent: string +): Promise { + return parseQmdToAstWithAttribution(qmdContent, null); +} + +/** + * Parse QMD content to Pandoc AST JSON, optionally with attribution. + * + * When `attributionJson` is non-null, the JSON string is shipped to the + * Rust `PreBuiltAttributionProvider`, which decodes runs + identities + * and drives `AttributionGenerateTransform` + `AttributionRenderTransform`. + * The resulting AST carries `astContext.attribution` (sparse `{s, actor, + * time}` records) and `astContext.attributionActors` (actor → + * `{name, color}` table). When `null`, the call is byte-identical to + * `parseQmdToAst(content)` — same code path, no provider installed, + * no transforms fire (Phase 0 test #10 contract). + * + * The producer of `attributionJson` is responsible for satisfying the + * Phase 6 invariant: every actor referenced in `runs` must have an + * entry in `identities`. See `useAttribution` for the hub-client + * producer that builds this payload from Automerge history. + */ +export async function parseQmdToAstWithAttribution( + qmdContent: string, + attributionJson: string | null, ): Promise { try { await initWasm(); const wasm = getWasm(); - const responseJson = await wasm.parse_qmd_to_ast(qmdContent); + const responseJson = await wasm.parse_qmd_to_ast_with_attribution( + qmdContent, + attributionJson ?? undefined, + ); const response: ParseResult = JSON.parse(responseJson); @@ -504,7 +586,6 @@ export async function parseQmdToAst( warnings: response.warnings, }; } else { - // Extract error message const errorMsg = response.error || 'Unknown parse error'; return { diff --git a/hub-client/src/utils/palette.test.ts b/hub-client/src/utils/palette.test.ts new file mode 100644 index 000000000..760bddd99 --- /dev/null +++ b/hub-client/src/utils/palette.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect } from 'vitest'; + +import { actorColor, fnv1aHex8 } from './palette'; + +// Drift-mitigation: bit-for-bit parity with the Rust siblings in +// `crates/quarto-core/src/attribution/palette.rs`. A divergence here +// is a producer/consumer drift bug; the rendered colour for a given +// actor would no longer match between the replay drawer and the +// Authorship overlay. + +describe('actorColor', () => { + it('matches the Rust `actor_color` formula for known inputs', () => { + // parseInt("aabbcc", 16) = 0xaabbcc = 11_189_196; % 10 = 6; + // TOL_MUTED[6] = teal. + expect(actorColor('aabbccdd')).toBe('#44AA99'); + // parseInt("000000", 16) = 0; % 10 = 0; TOL_MUTED[0] = rose. + expect(actorColor('00000000')).toBe('#CC6677'); + }); + + it('handles empty and non-hex input (NaN -> index 0)', () => { + expect(actorColor('')).toBe('#CC6677'); + expect(actorColor('zzz')).toBe('#CC6677'); + }); +}); + +describe('fnv1aHex8', () => { + it('matches Rust FNV-1a 32-bit reference vectors bit-for-bit', () => { + // Same vectors pinned in palette.rs::tests::fnv1a_hex8_known_vectors. + expect(fnv1aHex8('')).toBe('811c9dc5'); + expect(fnv1aHex8('a')).toBe('e40c292c'); + expect(fnv1aHex8('foobar')).toBe('bf9cf968'); + }); + + it('emits an 8-char lowercase hex string', () => { + const h = fnv1aHex8('alice@example.com'); + expect(h).toHaveLength(8); + expect(h).toMatch(/^[0-9a-f]{8}$/); + }); +}); diff --git a/hub-client/src/utils/palette.ts b/hub-client/src/utils/palette.ts new file mode 100644 index 000000000..d6b657d57 --- /dev/null +++ b/hub-client/src/utils/palette.ts @@ -0,0 +1,84 @@ +/** + * Deterministic actor-colour helpers shared between the replay drawer + * (`useReplayMode` / `ReplayDrawer`) and the attribution producer + * (`useAttribution`). Both must produce identical visual output for the + * same actor — colours seen during replay must match colours seen on + * Authorship overlays. + * + * **Drift discipline.** Both functions MUST stay bit-for-bit identical + * with their Rust siblings in + * `crates/quarto-core/src/attribution/palette.rs`. The Rust side + * colours git-blame author emails (via `--attribution=git`); this TS + * side colours Automerge actor IDs. They share `actor_color` / + * `actorColor` and `fnv1a_hex8` / `fnv1aHex8` definitions; the test + * suite in `palette.test.ts` pins reference vectors that match + * `palette.rs::tests`. + */ + +/** + * Tol Muted — a 10-colour qualitative, colour-blind-safe palette by + * Paul Tol. Reproduced from "Notes on colour schemes" + * (https://sronpersonalpages.nl/~pault/) as factual data; see the + * linked notes for the design rationale. Ordering matches Tol's + * canonical sequence so the same actor hash lands on the same name + * across libraries that adopt this palette (R `khroma`, Python + * `paletteer`, etc.). + * + * MUST stay in sync with `TOL_MUTED` in the Rust sibling + * `crates/quarto-core/src/attribution/palette.rs`. + */ +const TOL_MUTED: readonly string[] = [ + '#CC6677', // rose + '#332288', // indigo + '#DDCC77', // sand + '#117733', // green + '#88CCEE', // cyan + '#882255', // wine + '#44AA99', // teal + '#999933', // olive + '#AA4499', // purple + '#DDDDDD', // pale grey +] as const; + +/** + * Deterministic colour from an actor hash string. + * + * Formula: parse the first 6 hex chars of the actor ID as an integer, + * mod the palette length, index into `TOL_MUTED`. Non-hex input (or + * an empty string) collapses to index `0`. + */ +export function actorColor(actor: string): string { + const n = parseInt(actor.slice(0, 6), 16); + const idx = (Number.isNaN(n) ? 0 : n) % TOL_MUTED.length; + return TOL_MUTED[idx]; +} + +/** + * 32-bit FNV-1a hash, formatted as a left-padded 8-char hex string. + * + * Used to reduce an arbitrary actor string (e.g. an Automerge actor + * ID whose first 6 chars aren't guaranteed hex) to a hex-prefix-safe + * input for `actorColor`. Caller: the attribution producer in + * `useAttribution`, when synthesising the `(name, color)` fallback + * identity for actors with no profile metadata. + * + * Not cryptographic; deterministic and well-distributed for colour + * purposes. + */ +export function fnv1aHex8(s: string): string { + // Hash UTF-8 bytes to match Rust's `s.bytes()` iteration. JS strings + // are UTF-16 internally; using `charCodeAt` directly would diverge + // from Rust on any non-ASCII character. + const bytes = TEXT_ENCODER.encode(s); + let hash = 0x811c9dc5; + for (let i = 0; i < bytes.length; i++) { + hash ^= bytes[i]; + // Multiply by 0x01000193 (16777619), masked back into a u32 each + // iteration. `Math.imul` does 32-bit multiplication; `>>> 0` + // coerces to unsigned. + hash = Math.imul(hash, 0x01000193) >>> 0; + } + return hash.toString(16).padStart(8, '0'); +} + +const TEXT_ENCODER = new TextEncoder(); diff --git a/hub-client/src/vite-env.d.ts b/hub-client/src/vite-env.d.ts index 2c9e818ed..a76df6a1e 100644 --- a/hub-client/src/vite-env.d.ts +++ b/hub-client/src/vite-env.d.ts @@ -14,3 +14,14 @@ interface ImportMeta { declare const __GIT_COMMIT_HASH__: string declare const __GIT_COMMIT_DATE__: string declare const __BUILD_TIME__: string + +/** + * Default export = the contents of `resources/attribution/viewer.css`, + * embedded at build time by `attributionViewerCssPlugin` in + * `vite.config.ts`. Shared with the CLI's + * `AttributionViewerTransform` via `include_str!`. + */ +declare module 'virtual:quarto-attribution-viewer-css' { + const content: string + export default content +} diff --git a/hub-client/vite.config.ts b/hub-client/vite.config.ts index dad3f2c61..481830c44 100644 --- a/hub-client/vite.config.ts +++ b/hub-client/vite.config.ts @@ -1,8 +1,10 @@ import { defineConfig } from 'vite' +import type { Plugin } from 'vite' import react from '@vitejs/plugin-react' import wasm from 'vite-plugin-wasm' import compression from 'compression' import path from 'path' +import { readFileSync } from 'fs' import { execSync } from 'child_process' function getGitInfo() { @@ -17,6 +19,36 @@ function getGitInfo() { const gitInfo = getGitInfo() +/** + * Expose `virtual:quarto-attribution-viewer-css` as a module whose + * default export is the contents of `resources/attribution/viewer.css`. + * + * `resources/attribution/viewer.css` is the single source of truth + * shared with the CLI's `AttributionViewerTransform` (via + * `include_str!`). Vite / Vitest silently return empty for `?raw` + * imports of files outside the project root even with + * `server.fs.allow: ['..']`, so the virtual-module indirection is the + * supported way to embed an out-of-tree asset's contents at + * build/test time. + */ +function attributionViewerCssPlugin(): Plugin { + const VIRTUAL_ID = 'virtual:quarto-attribution-viewer-css'; + const RESOLVED_ID = '\0' + VIRTUAL_ID; + const sourcePath = path.resolve(__dirname, '../resources/attribution/viewer.css'); + return { + name: 'quarto-attribution-viewer-css', + resolveId(id) { + if (id === VIRTUAL_ID) return RESOLVED_ID; + }, + load(id) { + if (id === RESOLVED_ID) { + const css = readFileSync(sourcePath, 'utf-8'); + return `export default ${JSON.stringify(css)};`; + } + }, + }; +} + /** Hub server URL. Override with VITE_HUB_SERVER env var. */ const hubTarget = process.env.VITE_HUB_SERVER || 'http://localhost:3000'; @@ -26,6 +58,7 @@ export default defineConfig({ plugins: [ react(), wasm(), + attributionViewerCssPlugin(), { // vite preview's static-file middleware does not gzip by default, // so a cold Playwright context downloads the ~32 MB WASM uncompressed. diff --git a/resources/attribution/README.md b/resources/attribution/README.md new file mode 100644 index 000000000..597b4082a --- /dev/null +++ b/resources/attribution/README.md @@ -0,0 +1,30 @@ +# Attribution Viewer Resources + +Shared viewer CSS/JS for the per-node authorship attribution feature. + +## Contents + +- `viewer.css` — dotted underline on `[data-attr-actor]` plus the + `.q2-attr-badge` / `.q2-attr-badge-dot` / `.q2-attr-badge-time` classes + used for the hover badge. +- `viewer.js` — `mouseover` / `mouseout` listeners that build a floating + badge from the per-element `data-attr-*` attributes. + +## Consumers + +- `quarto-core`'s `AttributionViewerTransform` reads both files via + `include_str!` at compile time and injects them into + `rendered.includes.{header,after-body}` whenever attribution is + active for an HTML render (unless suppressed by YAML + `attribution: { source: git, viewer: false }`). +- The hub-client imports `viewer.css` via Vite's `?raw` mechanism and + injects it through `framework/attribution.tsx`'s `attributionStyles` + export. + +Edit either file in this directory; both surfaces will re-pick it up. +The class names form the stable contract between the CLI's static HTML +output and the hub-client's React preview — keep them in sync. + +The JS file is intentionally CLI-only. The hub-client mounts hover +handlers through React props on component boundaries, which is a +different event-handling model from the raw DOM listeners shipped here. diff --git a/resources/attribution/viewer.css b/resources/attribution/viewer.css new file mode 100644 index 000000000..9a1cceb2c --- /dev/null +++ b/resources/attribution/viewer.css @@ -0,0 +1,38 @@ +[data-attr-actor] { + color: var(--attr-color, currentColor); + text-decoration: underline dotted; + text-decoration-color: var(--attr-color, currentColor); + text-underline-offset: 4px; + cursor: help; +} + +.q2-attr-wrap { position: relative; } + +.q2-attr-badge { + display: inline-block; + z-index: 10; + font-size: 10px; + line-height: 1; + white-space: nowrap; + padding: 2px 6px; + border-radius: 3px; + background: #fff; + border: 1px solid var(--attr-color); + color: var(--attr-color); + font-weight: 600; + pointer-events: none; +} + +.q2-attr-badge-dot { + display: inline-block; + width: 6px; + height: 6px; + border-radius: 50%; + margin-right: 3px; + vertical-align: middle; +} + +.q2-attr-badge-time { + font-weight: 400; + opacity: 0.7; +} diff --git a/resources/attribution/viewer.js b/resources/attribution/viewer.js new file mode 100644 index 000000000..042012507 --- /dev/null +++ b/resources/attribution/viewer.js @@ -0,0 +1,95 @@ +// Auto-injected by AttributionViewerTransform when --attribution=git +// (or YAML attribution: git) is active. +// +// Colour paint is render-time CSS: `viewer.css` paints +// `[data-attr-actor]` via `var(--attr-color)`, and one per-actor rule +// per render publishes that variable plus `--attr-name`. This script +// only handles the interactive part — the floating badge that +// appears on hover. Identity comes from the wrapper's computed style +// (`--attr-color` / `--attr-name`); the timestamp stays per-node as +// `data-attr-time`. + +(function () { + function formatRelativeTime(timestamp) { + var now = Date.now(); + // git blame emits seconds, Automerge emits milliseconds; + // the 1e12 threshold distinguishes them. + var tsMs = timestamp < 1e12 ? timestamp * 1000 : timestamp; + var diffSec = Math.floor((now - tsMs) / 1000); + if (diffSec < 60) return 'just now'; + var diffMin = Math.floor(diffSec / 60); + if (diffMin < 60) return diffMin + 'm ago'; + var diffHr = Math.floor(diffMin / 60); + if (diffHr < 24) return diffHr + 'h ago'; + var diffDay = Math.floor(diffHr / 24); + return diffDay + 'd ago'; + } + + // CSS string custom properties round-trip with their wrapping + // quotes (e.g. `--attr-name: "Charlie"` returns `"Charlie"`). + // Strip them and undo the two escapes the CSS emitter applies + // (`\\` and `\"`). Any other content survives unchanged. + function readCssString(cs, name) { + var raw = cs.getPropertyValue(name).trim(); + if (raw.length >= 2 && raw.charAt(0) === '"' && raw.charAt(raw.length - 1) === '"') { + raw = raw.slice(1, -1).replace(/\\"/g, '"').replace(/\\\\/g, '\\'); + } + return raw; + } + + function buildBadge(leaf) { + var cs = window.getComputedStyle(leaf); + var color = cs.getPropertyValue('--attr-color').trim(); + var name = readCssString(cs, '--attr-name'); + var time = Number(leaf.getAttribute('data-attr-time')); + if (!name || !color || !Number.isFinite(time)) return null; + + var badge = document.createElement('span'); + badge.className = 'q2-attr-badge'; + badge.style.setProperty('--attr-color', color); + + var dot = document.createElement('span'); + dot.className = 'q2-attr-badge-dot'; + dot.style.backgroundColor = color; + badge.appendChild(dot); + + badge.appendChild(document.createTextNode(name + ' ')); + + var timeEl = document.createElement('span'); + timeEl.className = 'q2-attr-badge-time'; + timeEl.textContent = formatRelativeTime(time); + badge.appendChild(timeEl); + + return badge; + } + + var currentBadge = null; + + document.addEventListener('mouseover', function (e) { + var leaf = e.target.closest('[data-attr-actor]'); + if (!leaf) return; + if (currentBadge) currentBadge.remove(); + + var badge = buildBadge(leaf); + if (!badge) return; + + var rect = leaf.getBoundingClientRect(); + badge.style.position = 'fixed'; + badge.style.top = (rect.bottom + 2) + 'px'; + badge.style.left = rect.left + 'px'; + + document.body.appendChild(badge); + currentBadge = badge; + }); + + document.addEventListener('mouseout', function (e) { + var related = e.relatedTarget; + if (related && related.closest && related.closest('[data-attr-actor]')) { + return; + } + if (currentBadge) { + currentBadge.remove(); + currentBadge = null; + } + }); +})();