From 53d207c835e729ccf0a5e855f8b140a2aa5b6a14 Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 16:14:16 +0900
Subject: [PATCH 1/6] fix: improve structure_cost_filter to keep valid
 multi-segment paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

今ですね (今|です|ね) was being filtered out because:
1. Single-segment paths (sc=0) set min_sc too low
2. Prefix POS transitions (e.g. 今[prefix]→デスネ, conn=256)
   dragged the baseline down further

Changes:
- Raise structure_cost_filter from 4000 to 6000
- Impute single-segment paths with prefix_floor for min_sc
  computation so 0-transition paths don't set artificially low baseline
- Floor prefix POS transitions at filter/2 to prevent anomalously
  cheap connections from skewing the threshold
- Cap script_cost scale at min(reading_chars, 2) to reduce excessive
  kanji bonuses on long compound readings
- Add いまですね regression test case (accuracy: 61/61)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 engine/crates/lex-core/src/converter/cost.rs  |  2 +-
 .../crates/lex-core/src/converter/reranker.rs | 38 ++++++++++++++---
 .../lex-core/src/converter/tests/reranker.rs  | 41 ++++++++++---------
 .../crates/lex-core/src/default_settings.toml |  2 +-
 engine/crates/lex-core/src/settings.rs        | 12 +++---
 engine/testcorpus/accuracy-corpus.toml        |  7 ++++
 6 files changed, 69 insertions(+), 33 deletions(-)
diff --git a/engine/crates/lex-core/src/converter/cost.rs b/engine/crates/lex-core/src/converter/cost.rs
index 4d767c0..1f39231 100644
--- a/engine/crates/lex-core/src/converter/cost.rs
+++ b/engine/crates/lex-core/src/converter/cost.rs
@@ -29,7 +29,7 @@ pub fn script_cost(surface: &str, reading_chars: usize) -> i64 {
             all_katakana = false;
         }
     }
-    let scale = reading_chars.min(3) as i64;
+    let scale = reading_chars.min(2) as i64;
     if has_kanji && has_kana {
         -s.cost.mixed_script_bonus * scale / 3
     } else if has_kanji {
diff --git a/engine/crates/lex-core/src/converter/reranker.rs b/engine/crates/lex-core/src/converter/reranker.rs
index fb886c3..2ab5e68 100644
--- a/engine/crates/lex-core/src/converter/reranker.rs
+++ b/engine/crates/lex-core/src/converter/reranker.rs
@@ -108,23 +108,51 @@ pub fn rerank(
         return;
     }
 
-    // Step 1: Compute structure_cost for each path
+    // Step 1: Compute structure_cost for each path.
+    //
+    // Transitions FROM a prefix POS (role == 3) get a floor of half the
+    // filter threshold. Without this, a prefix→content-word transition
+    // (e.g. 今[prefix]→デスネ with conn=256) can drag min_sc so low that
+    // the hard filter drops correct multi-segment paths like 今|です|ね.
     let cap = settings().reranker.structure_cost_transition_cap;
+    let prefix_floor = settings().reranker.structure_cost_filter / 2;
     let mut structure_costs: Vec<i64> = paths
         .iter()
         .map(|p| {
             let mut sc: i64 = 0;
             for i in 1..p.segments.len() {
-                sc += conn_cost(conn, p.segments[i - 1].right_id, p.segments[i].left_id).min(cap);
+                let mut tc = conn_cost(conn, p.segments[i - 1].right_id, p.segments[i].left_id);
+                if let Some(c) = conn {
+                    if c.role(p.segments[i - 1].left_id) == 3 {
+                        tc = tc.max(prefix_floor);
+                    }
+                }
+                sc += tc.min(cap);
             }
             sc
         })
         .collect();
 
     // Step 2: Hard filter — drop paths exceeding min + threshold.
-    // min_sc is guaranteed to be <= threshold, so at least one path always survives.
-    let min_sc = *structure_costs.iter().min().unwrap();
-    let threshold = min_sc + settings().reranker.structure_cost_filter;
+    //
+    // For min_sc computation, single-segment paths (0 transitions, sc=0) are
+    // imputed with prefix_floor so they don't set an artificially low baseline.
+    // Combined with the prefix-transition floor in step 1, this ensures the
+    // threshold is high enough to keep correct multi-segment paths.
+    let filter = settings().reranker.structure_cost_filter;
+    let min_sc = structure_costs
+        .iter()
+        .zip(paths.iter())
+        .map(|(&sc, p)| {
+            if p.segments.len() <= 1 {
+                prefix_floor
+            } else {
+                sc
+            }
+        })
+        .min()
+        .unwrap();
+    let threshold = min_sc + filter;
     {
         let mut i = 0;
         let mut kept_costs = Vec::new();
diff --git a/engine/crates/lex-core/src/converter/tests/reranker.rs b/engine/crates/lex-core/src/converter/tests/reranker.rs
index 1912bc2..35a7853 100644
--- a/engine/crates/lex-core/src/converter/tests/reranker.rs
+++ b/engine/crates/lex-core/src/converter/tests/reranker.rs
@@ -176,14 +176,14 @@ fn test_rerank_penalizes_uneven_segments() {
 
     rerank(&mut paths, None, None);
 
-    // script_cost (scaled by reading length):
-    //   "来たり" (reading "きたり" = 3 chars) → mixed bonus -3000 * 3/3 = -3000
+    // script_cost (scaled by reading length, capped at 2):
+    //   "来たり" (reading "きたり" = 3 chars, cap 2) → mixed bonus -3000 * 2/3 = -2000
     //   "出来" (reading "でき" = 2 chars) → pure_kanji bonus -1000 * 2/3 = -666
-    // Uneven: 5000 + variance(0, exempt) + script("で"=0 + "来たり"=-3000) = 2000
-    // Even:   6500 + variance(0, exempt) + script("出来"=-666 + "たり"=0) = 5834
+    // Uneven: 5000 + script("で"=0 + "来たり"=-2000) = 3000
+    // Even:   6500 + script("出来"=-666 + "たり"=0) = 5834
     // Uneven path wins due to mixed-script bonus on "来たり"
     assert_eq!(paths[0].segments[0].surface, "で");
-    assert_eq!(paths[0].viterbi_cost, 2000);
+    assert_eq!(paths[0].viterbi_cost, 3000);
     assert_eq!(paths[1].segments[0].surface, "出来");
     assert_eq!(paths[1].viterbi_cost, 5834);
 }
@@ -373,13 +373,13 @@ fn uniform_conn(cost: i16) -> ConnectionMatrix {
 
 #[test]
 fn test_filter_drops_fragmented_paths() {
-    // Transition cost = 1500 each.
-    // Path A: 1 segment → 0 transitions → structure_cost = 0
-    // Path B: 2 segments → 1 transition → structure_cost = 1500
-    // Path C: 5 segments → 4 transitions → structure_cost = 6000
-    // min_sc = 0, threshold = 0 + 4000 = 4000
-    // Path C (6000 > 4000) should be dropped; A and B should remain.
-    let conn = uniform_conn(1500);
+    // Transition cost = 5000 each.
+    // Path A: 1 segment → sc = 0 (imputed to 3000 for min_sc)
+    // Path B: 2 segments → sc = 5000
+    // Path C: 5 segments → sc = 20000
+    // min_sc = 3000 (imputed), threshold = 3000 + 6000 = 9000.
+    // Path C (20000 > 9000) should be dropped; A and B survive.
+    let conn = uniform_conn(5000);
 
     let mut paths = vec![
         ScoredPath {
@@ -455,9 +455,9 @@ fn test_filter_drops_fragmented_paths() {
 
     rerank(&mut paths, Some(&conn), None);
 
-    // Path C should have been filtered out
+    // Path C should have been filtered out (sc=20000 > threshold=9000);
+    // paths A and B survive.
     assert_eq!(paths.len(), 2);
-    // Verify the fragmented 5-segment path is gone
     assert!(paths.iter().all(|p| p.segments.len() <= 2));
 }
 
@@ -465,8 +465,8 @@ fn test_filter_drops_fragmented_paths() {
 fn test_filter_keeps_all_when_all_exceed() {
     // All paths have high structure_cost; none should be dropped.
     // Transition cost = 2000. All paths have 4 segments → 3 transitions → sc = 6000.
-    // min_sc = 6000, threshold = 6000 + 4000 = 10000.
-    // All paths have sc = 6000 ≤ 10000, so all pass.
+    // min_sc = 6000, threshold = 6000 + 6000 = 12000.
+    // All paths have sc = 6000 ≤ 12000, so all pass.
     // But to truly test the "all exceed" safety, we need a scenario where
     // min_sc itself is above the threshold relative to... Actually the safety
     // is: if ALL paths have sc > threshold, keep all. Let's just verify
@@ -510,10 +510,11 @@ fn test_filter_keeps_all_when_all_exceed() {
 
 #[test]
 fn test_filter_preserves_minimum_path() {
-    // The path with minimum structure_cost must always survive the filter.
-    // Path A: 1 segment → sc = 0 (minimum)
-    // Path B: 4 segments → sc = 4500 (3 × 1500); 4500 > 0 + 4000 → filtered
-    let conn = uniform_conn(1500);
+    // The path with minimum structure_cost always survives.
+    // Path A: 4 segments → sc = 15000
+    // Path B: 1 segment → sc = 0 (imputed to 3000 for min_sc)
+    // min_sc = 3000, threshold = 3000 + 6000 = 9000. Path A (15000 > 9000) → filtered.
+    let conn = uniform_conn(5000);
 
     let mut paths = vec![
         ScoredPath {
diff --git a/engine/crates/lex-core/src/default_settings.toml b/engine/crates/lex-core/src/default_settings.toml
index 7f397c6..05c94ef 100644
--- a/engine/crates/lex-core/src/default_settings.toml
+++ b/engine/crates/lex-core/src/default_settings.toml
@@ -8,7 +8,7 @@ unknown_word_cost = 10000
 
 [reranker]
 length_variance_weight = 2000
-structure_cost_filter = 4000
+structure_cost_filter = 6000
 non_independent_kanji_penalty = 3000
 te_form_kanji_penalty = 3500
 pronoun_cost_bonus = 3500
diff --git a/engine/crates/lex-core/src/settings.rs b/engine/crates/lex-core/src/settings.rs
index 59eadc3..73ecdc6 100644
--- a/engine/crates/lex-core/src/settings.rs
+++ b/engine/crates/lex-core/src/settings.rs
@@ -331,7 +331,7 @@ mod tests {
         assert_eq!(s.cost.latin_penalty, 20000);
         assert_eq!(s.cost.unknown_word_cost, 10000);
         assert_eq!(s.reranker.length_variance_weight, 2000);
-        assert_eq!(s.reranker.structure_cost_filter, 4000);
+        assert_eq!(s.reranker.structure_cost_filter, 6000);
         assert_eq!(s.reranker.non_independent_kanji_penalty, 3000);
         assert_eq!(s.reranker.te_form_kanji_penalty, 3500);
         assert_eq!(s.reranker.pronoun_cost_bonus, 3500);
@@ -406,7 +406,7 @@ unknown_word_cost = 10000
 
 [reranker]
 length_variance_weight = 2000
-structure_cost_filter = 4000
+structure_cost_filter = 6000
 non_independent_kanji_penalty = 3000
 
 [history]
@@ -438,7 +438,7 @@ unknown_word_cost = 10000
 
 [reranker]
 length_variance_weight = 2000
-structure_cost_filter = 4000
+structure_cost_filter = 6000
 non_independent_kanji_penalty = 3000
 
 [history]
@@ -469,7 +469,7 @@ unknown_word_cost = 10000
 
 [reranker]
 length_variance_weight = 2000
-structure_cost_filter = 4000
+structure_cost_filter = 6000
 non_independent_kanji_penalty = 3000
 
 [history]
@@ -500,7 +500,7 @@ unknown_word_cost = 10000
 
 [reranker]
 length_variance_weight = 2000
-structure_cost_filter = 4000
+structure_cost_filter = 6000
 non_independent_kanji_penalty = 3000
 
 [history]
@@ -532,7 +532,7 @@ unknown_word_cost = 10000
 
 [reranker]
 length_variance_weight = 2000
-structure_cost_filter = 4000
+structure_cost_filter = 6000
 non_independent_kanji_penalty = 3000
 
 [history]
diff --git a/engine/testcorpus/accuracy-corpus.toml b/engine/testcorpus/accuracy-corpus.toml
index 3e94b00..08d07fd 100644
--- a/engine/testcorpus/accuracy-corpus.toml
+++ b/engine/testcorpus/accuracy-corpus.toml
@@ -309,6 +309,13 @@ category = "regression"
 tags = ["copula-omission", "structure-cost-cap"]
 note = "形容動詞語幹→ある仮定形の接続コスト(7025)がstructure_cost_filterで除外される問題"
 
+[[cases]]
+reading = "いまですね"
+expected = "今ですね"
+category = "regression"
+tags = ["structure-cost-filter", "particle"]
+note = "今|です|ね が structure_cost_filter で除外される問題 — filter 閾値引き上げ+prefix floor で修正"
+
 [[cases]]
 reading = "あったほうが"
 expected = "あった方が"

From 878289103819a55a0a4b9ad0bb8983f17ee266f1 Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 17:33:19 +0900
Subject: [PATCH 2/6] refactor: use is_prefix() instead of magic role value

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 engine/crates/lex-core/src/converter/reranker.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engine/crates/lex-core/src/converter/reranker.rs b/engine/crates/lex-core/src/converter/reranker.rs
index 2ab5e68..bfc6e5f 100644
--- a/engine/crates/lex-core/src/converter/reranker.rs
+++ b/engine/crates/lex-core/src/converter/reranker.rs
@@ -123,7 +123,7 @@ pub fn rerank(
             for i in 1..p.segments.len() {
                 let mut tc = conn_cost(conn, p.segments[i - 1].right_id, p.segments[i].left_id);
                 if let Some(c) = conn {
-                    if c.role(p.segments[i - 1].left_id) == 3 {
+                    if c.is_prefix(p.segments[i - 1].left_id) {
                         tc = tc.max(prefix_floor);
                     }
                 }

From f6086e2be554e9bfc5cb28ada6a02bdbaf975826 Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 18:53:22 +0900
Subject: [PATCH 3/6] test: add unit test for prefix transition floor in
 structure_cost_filter

Verify that is_prefix() floor logic is exercised by using
from_text_with_roles to build a ConnectionMatrix with a prefix POS.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../lex-core/src/converter/tests/reranker.rs  | 113 ++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/engine/crates/lex-core/src/converter/tests/reranker.rs b/engine/crates/lex-core/src/converter/tests/reranker.rs
index 35a7853..e61686b 100644
--- a/engine/crates/lex-core/src/converter/tests/reranker.rs
+++ b/engine/crates/lex-core/src/converter/tests/reranker.rs
@@ -568,3 +568,116 @@ fn test_filter_preserves_minimum_path() {
     assert_eq!(paths.len(), 1);
     assert_eq!(paths[0].segments[0].surface, "合言葉");
 }
+
+#[test]
+fn test_prefix_floor_prevents_low_baseline() {
+    // Without prefix floor, a prefix→content transition with very low
+    // connection cost (e.g. 200) would set min_sc so low that a correct
+    // 3-segment path gets filtered out.
+    //
+    // Setup: 4 POS IDs (0..3), ID 0 is prefix (role=3).
+    // Connection costs: all 5000, except (0→any) = 200.
+    //
+    // Path A: [prefix(id=0)] → [content(id=1)] → [content(id=1)]
+    //   Without floor: sc = 200 + 5000 = 5200
+    //   With floor:    sc = 3000 + 5000 = 8000  (prefix_floor = 6000/2 = 3000)
+    //
+    // Path B: [content(id=1)] → [content(id=1)] → [content(id=1)]
+    //   sc = 5000 + 5000 = 10000
+    //
+    // Without floor: min_sc = 5200, threshold = 5200 + 6000 = 11200.
+    //   Both paths survive (10000 ≤ 11200). ← OK, but artificially low baseline.
+    //
+    // With floor: min_sc = 8000, threshold = 8000 + 6000 = 14000.
+    //   Both paths survive (10000 ≤ 14000). ← More robust baseline.
+    //
+    // To show the floor matters, add Path C with sc that would be dropped
+    // without floor but kept with floor is tricky, so instead we verify
+    // that the prefix transition is floored by checking structure_cost values
+    // indirectly: add a fragmented Path C with sc = 12000 that survives
+    // with floor (12000 ≤ 14000) but would be dropped without it if we
+    // had a tighter filter. Here we just verify both A and B survive and
+    // the prefix floor logic executes.
+    let num_ids = 4u16;
+    let mut costs = Vec::new();
+    for left in 0..num_ids {
+        for _right in 0..num_ids {
+            costs.push(if left == 0 { 200i16 } else { 5000 });
+        }
+    }
+    let mut text = format!("{num_ids} {num_ids}\n");
+    for c in &costs {
+        text.push_str(&format!("{c}\n"));
+    }
+    // ID 0 = prefix (role 3), IDs 1-3 = content (role 0)
+    let roles = vec![3u8, 0, 0, 0];
+    let conn =
+        ConnectionMatrix::from_text_with_roles(&text, 0, num_ids - 1, roles).unwrap();
+
+    // Verify prefix is recognized
+    assert!(conn.is_prefix(0));
+    assert!(!conn.is_prefix(1));
+
+    let mut paths = vec![
+        // Path A: prefix → content → content (low prefix transition)
+        ScoredPath {
+            segments: vec![
+                RichSegment {
+                    reading: "お".into(),
+                    surface: "御".into(),
+                    left_id: 0,
+                    right_id: 0,
+                    word_cost: 0,
+                },
+                RichSegment {
+                    reading: "くるま".into(),
+                    surface: "車".into(),
+                    left_id: 1,
+                    right_id: 1,
+                    word_cost: 0,
+                },
+                RichSegment {
+                    reading: "で".into(),
+                    surface: "で".into(),
+                    left_id: 1,
+                    right_id: 1,
+                    word_cost: 0,
+                },
+            ],
+            viterbi_cost: 3000,
+        },
+        // Path B: content → content → content (normal transitions)
+        ScoredPath {
+            segments: vec![
+                RichSegment {
+                    reading: "おくる".into(),
+                    surface: "送る".into(),
+                    left_id: 1,
+                    right_id: 1,
+                    word_cost: 0,
+                },
+                RichSegment {
+                    reading: "ま".into(),
+                    surface: "間".into(),
+                    left_id: 1,
+                    right_id: 1,
+                    word_cost: 0,
+                },
+                RichSegment {
+                    reading: "で".into(),
+                    surface: "で".into(),
+                    left_id: 1,
+                    right_id: 1,
+                    word_cost: 0,
+                },
+            ],
+            viterbi_cost: 4000,
+        },
+    ];
+
+    rerank(&mut paths, Some(&conn), None);
+
+    // Both paths should survive: with prefix floor, min_sc is raised
+    // so neither path exceeds the threshold.
+    assert_eq!(paths.len(), 2);
+}

From 64102d17a65c4213e1cc070235bc59c9365a1c59 Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 18:56:27 +0900
Subject: [PATCH 4/6] style: fix formatting

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 engine/crates/lex-core/src/converter/tests/reranker.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/engine/crates/lex-core/src/converter/tests/reranker.rs b/engine/crates/lex-core/src/converter/tests/reranker.rs
index e61686b..985f1a6 100644
--- a/engine/crates/lex-core/src/converter/tests/reranker.rs
+++ b/engine/crates/lex-core/src/converter/tests/reranker.rs
@@ -611,8 +611,7 @@ fn test_prefix_floor_prevents_low_baseline() {
     }
     // ID 0 = prefix (role 3), IDs 1-3 = content (role 0)
     let roles = vec![3u8, 0, 0, 0];
-    let conn =
-        ConnectionMatrix::from_text_with_roles(&text, 0, num_ids - 1, roles).unwrap();
+    let conn = ConnectionMatrix::from_text_with_roles(&text, 0, num_ids - 1, roles).unwrap();
 
     // Verify prefix is recognized
     assert!(conn.is_prefix(0));

From 7590014b0e1d6ed19d43df1478cbd22f1b60b60d Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 19:07:14 +0900
Subject: [PATCH 5/6] fix: clamp prefix_floor to cap and improve prefix floor
 test

- Clamp prefix_floor to min(filter/2, cap) so the floor remains
  effective when structure_cost_transition_cap is lower than the floor.
- Rewrite test_prefix_floor_prevents_low_baseline so that path B
  would be dropped without the floor but survives with it, ensuring
  the test actually validates the flooring logic.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../crates/lex-core/src/converter/reranker.rs |  2 +-
 .../lex-core/src/converter/tests/reranker.rs  | 55 +++++++------------
 2 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/engine/crates/lex-core/src/converter/reranker.rs b/engine/crates/lex-core/src/converter/reranker.rs
index bfc6e5f..bb56737 100644
--- a/engine/crates/lex-core/src/converter/reranker.rs
+++ b/engine/crates/lex-core/src/converter/reranker.rs
@@ -115,7 +115,7 @@ pub fn rerank(
     // (e.g. 今[prefix]→デスネ with conn=256) can drag min_sc so low that
     // the hard filter drops correct multi-segment paths like 今|です|ね.
     let cap = settings().reranker.structure_cost_transition_cap;
-    let prefix_floor = settings().reranker.structure_cost_filter / 2;
+    let prefix_floor = (settings().reranker.structure_cost_filter / 2).min(cap);
     let mut structure_costs: Vec<i64> = paths
         .iter()
         .map(|p| {
diff --git a/engine/crates/lex-core/src/converter/tests/reranker.rs b/engine/crates/lex-core/src/converter/tests/reranker.rs
index 985f1a6..e43508f 100644
--- a/engine/crates/lex-core/src/converter/tests/reranker.rs
+++ b/engine/crates/lex-core/src/converter/tests/reranker.rs
@@ -571,38 +571,30 @@ fn test_filter_preserves_minimum_path() {
 
 #[test]
 fn test_prefix_floor_prevents_low_baseline() {
-    // Without prefix floor, a prefix→content transition with very low
-    // connection cost (e.g. 200) would set min_sc so low that a correct
-    // 3-segment path gets filtered out.
+    // Verifies that prefix floor raises min_sc enough to keep a path
+    // that would be dropped without it.
     //
-    // Setup: 4 POS IDs (0..3), ID 0 is prefix (role=3).
-    // Connection costs: all 5000, except (0→any) = 200.
+    // Setup: 4 POS IDs, ID 0 is prefix (role=3).
+    // Connection costs: all 4000, except (0→any) = 100.
+    // prefix_floor = 6000 / 2 = 3000.
     //
-    // Path A: [prefix(id=0)] → [content(id=1)] → [content(id=1)]
-    //   Without floor: sc = 200 + 5000 = 5200
-    //   With floor:    sc = 3000 + 5000 = 8000  (prefix_floor = 6000/2 = 3000)
+    // Path A: [prefix(id=0)] → [content(id=1)]  (1 transition)
+    //   Without floor: sc = 100
+    //   With floor:    sc = 3000
     //
-    // Path B: [content(id=1)] → [content(id=1)] → [content(id=1)]
-    //   sc = 5000 + 5000 = 10000
+    // Path B: [content(id=1)] → [content(id=1)] → [content(id=1)]  (2 transitions)
+    //   sc = 4000 + 4000 = 8000
     //
-    // Without floor: min_sc = 5200, threshold = 5200 + 6000 = 11200.
-    //   Both paths survive (10000 ≤ 11200). ← OK, but artificially low baseline.
+    // Without floor: min_sc = 100,  threshold = 100 + 6000 = 6100.
+    //   Path B (8000 > 6100) → DROPPED.
     //
-    // With floor: min_sc = 8000, threshold = 8000 + 6000 = 14000.
-    //   Both paths survive (10000 ≤ 14000). ← More robust baseline.
-    //
-    // To show the floor matters, add Path C with sc that would be dropped
-    // without floor but kept with floor is tricky, so instead we verify
-    // that the prefix transition is floored by checking structure_cost values
-    // indirectly: add a fragmented Path C with sc = 12000 that survives
-    // with floor (12000 ≤ 14000) but would be dropped without it if we
-    // had a tighter filter. Here we just verify both A and B survive and
-    // the prefix floor logic executes.
+    // With floor: min_sc = 3000, threshold = 3000 + 6000 = 9000.
+    //   Path B (8000 ≤ 9000) → KEPT.
     let num_ids = 4u16;
     let mut costs = Vec::new();
     for left in 0..num_ids {
         for _right in 0..num_ids {
-            costs.push(if left == 0 { 200i16 } else { 5000 });
+            costs.push(if left == 0 { 100i16 } else { 4000 });
         }
     }
     let mut text = format!("{num_ids} {num_ids}\n");
@@ -613,12 +605,11 @@ fn test_prefix_floor_prevents_low_baseline() {
     let roles = vec![3u8, 0, 0, 0];
     let conn = ConnectionMatrix::from_text_with_roles(&text, 0, num_ids - 1, roles).unwrap();
 
-    // Verify prefix is recognized
     assert!(conn.is_prefix(0));
     assert!(!conn.is_prefix(1));
 
     let mut paths = vec![
-        // Path A: prefix → content → content (low prefix transition)
+        // Path A: prefix → content (low prefix transition, floored to 3000)
         ScoredPath {
             segments: vec![
                 RichSegment {
@@ -635,17 +626,12 @@ fn test_prefix_floor_prevents_low_baseline() {
                     right_id: 1,
                     word_cost: 0,
                 },
-                RichSegment {
-                    reading: "で".into(),
-                    surface: "で".into(),
-                    left_id: 1,
-                    right_id: 1,
-                    word_cost: 0,
-                },
             ],
             viterbi_cost: 3000,
         },
-        // Path B: content → content → content (normal transitions)
+        // Path B: content → content → content (sc = 8000)
+        // Without floor this would be dropped (8000 > 6100).
+        // With floor it survives (8000 ≤ 9000).
         ScoredPath {
             segments: vec![
                 RichSegment {
@@ -676,7 +662,6 @@ fn test_prefix_floor_prevents_low_baseline() {
 
     rerank(&mut paths, Some(&conn), None);
 
-    // Both paths should survive: with prefix floor, min_sc is raised
-    // so neither path exceeds the threshold.
+    // Both paths survive thanks to the prefix floor raising the threshold.
     assert_eq!(paths.len(), 2);
 }

From 88159517ea4786e448d08f7199d45d4b0f4e8b2f Mon Sep 17 00:00:00 2001
From: "SAKAI, Kazuaki" <kaz.july.7@gmail.com>
Date: Fri, 13 Mar 2026 19:14:05 +0900
Subject: [PATCH 6/6] fix: use right_id for prefix check in structure_cost
 computation

The transition cost is conn_cost(prev.right_id, next.left_id), so
the prefix check should use right_id (the outgoing POS) rather than
left_id (the incoming POS) of the previous segment.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 engine/crates/lex-core/src/converter/reranker.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engine/crates/lex-core/src/converter/reranker.rs b/engine/crates/lex-core/src/converter/reranker.rs
index bb56737..783c430 100644
--- a/engine/crates/lex-core/src/converter/reranker.rs
+++ b/engine/crates/lex-core/src/converter/reranker.rs
@@ -123,7 +123,7 @@ pub fn rerank(
             for i in 1..p.segments.len() {
                 let mut tc = conn_cost(conn, p.segments[i - 1].right_id, p.segments[i].left_id);
                 if let Some(c) = conn {
-                    if c.is_prefix(p.segments[i - 1].left_id) {
+                    if c.is_prefix(p.segments[i - 1].right_id) {
                         tc = tc.max(prefix_floor);
                     }
                 }