feat(classifier): 新增 notResource flag，AI 兜底拦表情包/dev URL/裸图片 (#21)

longsizhuo · web-flow · commit 4dbf7567edd9 · 2026-04-26T21:13:25.000+08:00
* feat(classifier): 新增 notResource flag，AI 兜底拦表情包/dev URL/裸图片等非资源链接 事故复盘：ChatBot listener 漏过表情包/GIF/自家 GitHub PR 链接，被 DeepSeek 走完分类后打 APPROVED 上架（#5/#18/#19）。listener 改了多轮黑名单（贴纸聚合站、媒体扩展名、self-org GitHub dev 子路径），但黑名单永远穷举不完。 改：ClassificationResult 加第 5 个 flag notResource，prompt 教模型识别'内容资源 vs 非资源'：表情包/贴纸/GIF/裸图片/视频音频直链/登录墙/错误页/dev 子路径（PR/issue/commit）一律 notResource=true → 走 FLAGGED 进人工待审。仓库主页、文章、论文、项目主页等正常资源全 false 放行。 兼容性：parseResponse 用 .asBoolean(false) 读 notResource，旧模型/旧 cache 缺字段时降级为 false，不阻拦正常分享。flags map 多带一个 key，前端展示逻辑会自然 fallthrough。 测试：+1 场景 (notResource=true → FLAGGED)，全 50 个 community.** 测试 pass。 * fix(worker): 把 notResource 真正塞进 flags Map 和 log（前一 commit Edit 漏了）
diff --git a/src/main/java/com/involutionhell/backend/community/service/ClassificationResult.java b/src/main/java/com/involutionhell/backend/community/service/ClassificationResult.java
@@ -4,11 +4,13 @@
  * DeepSeek 分类结果（M3）。
  *
  * category 已由 LinkCategory.normalize() 保证合法；
- * flags 对应 DeepSeek 返回的安全判定：
+ * flags 对应 DeepSeek 返回的安全/质量判定：
  * - nsfw：色情/暴力等不适宜内容
  * - ad：纯商业推广软文（技术公告/版本更新等不算）
  * - flame：引战/情绪化内容
  * - illegal：疑似违反中国法律法规（反动/颠覆/分裂/邪教/赌博/毒品等）
+ * - notResource：链接本身不是"可分享的内容资源"（表情包/贴纸/GIF/裸图片/
+ *                登录墙/错误页/dev PR 通知页等），客户端 listener 拦不住的兜底
  *
  * 任一 flag 为 true → worker 将 status 推到 FLAGGED（进人工复核）。
  */
@@ -17,17 +19,18 @@ public record ClassificationResult(
         boolean nsfw,
         boolean ad,
         boolean flame,
-        boolean illegal
+        boolean illegal,
+        boolean notResource
 ) {
 
-    /** 是否命中任意安全 flag。 */
+    /** 是否命中任意安全/质量 flag。 */
     public boolean anyFlagSet() {
-        return nsfw || ad || flame || illegal;
+        return nsfw || ad || flame || illegal || notResource;
     }
 
     /** 降级结果：分类为 other，flags 全 false（网络/解析等**非内容过滤**原因的失败用）。 */
     public static ClassificationResult fallback() {
-        return new ClassificationResult("other", false, false, false, false);
+        return new ClassificationResult("other", false, false, false, false, false);
     }
 
     /**
@@ -36,6 +39,6 @@ public static ClassificationResult fallback() {
      * 本系统将 illegal 置为 true 让其走 FLAGGED 进人工复核，而不是 fallback 静默放行。
      */
     public static ClassificationResult blockedByContentFilter() {
-        return new ClassificationResult("other", false, false, false, true);
+        return new ClassificationResult("other", false, false, false, true, false);
     }
 }
diff --git a/src/main/java/com/involutionhell/backend/community/service/ClassificationService.java b/src/main/java/com/involutionhell/backend/community/service/ClassificationService.java
@@ -53,7 +53,7 @@ public class ClassificationService {
             中国大陆现行法律法规。根据输入信息，把链接分到以下分类之一：
             %s
 
-            同时判断内容是否存在 4 类安全问题。对 nsfw/ad/flame 采用"宁松勿严"
+            同时判断内容是否存在 5 类问题。对 nsfw/ad/flame/notResource 采用"宁松勿严"
             策略（社群正常技术分享放行）；对 illegal 必须严格，宁可误报。
 
             - nsfw: 色情、裸露、血腥暴力、猎奇不适。仅当**明确**涉及时为 true。
@@ -67,6 +67,19 @@ public class ClassificationService {
                     新闻报道、个人作品集。
             - flame: 明显引战 / 人身攻击 / 极端言论 / 刻意煽动对立。技术路线之争、
                     理性观点分歧**不算**。
+            - notResource: 链接本身不是"可分享的内容资源"（不是色情/广告/引战，
+                    只是没有信息价值，不该上架到社群分享库）。任一命中即 true：
+                    · 表情包 / 贴纸 / GIF（tenor / klipy / giphy / 微博表情等）
+                    · 单张图片 / 截图 / 头像（孤立的纯图片页面，非文章配图）
+                    · 视频/音频/媒体文件直链（路径以 .mp4 .mp3 .gif 等结尾）
+                    · 登录墙 / 错误页 / 验证码 / 404 / 维护页（OG 抓不到正文）
+                    · 内部 dev 通知页（GitHub PR/Issue/Commit、Jira 工单、CI 报告）
+                    · 空白页 / 广告聚合页 / 跳转中转页
+                    **反例（全部 false）**：技术博客文章、论文、开源项目主页（README）、
+                    新闻报道、文档教程、知乎/小红书/微博正常帖子、视频教程页
+                    （含正文/字幕的播放页，不是裸 .mp4 文件）。
+                    注意：仓库主页（如 github.com/foo/bar）允许，dev 子路径
+                    （github.com/foo/bar/pull/123）才命中本规则。
             - illegal: 疑似违反中国大陆法律法规的内容。任一命中即 true：
                     · 反对宪法基本原则、颠覆国家政权、煽动分裂国家、破坏国家统一
                     · 攻击党和政府、宣扬港独 / 台独 / 藏独 / 疆独
@@ -80,7 +93,7 @@ public class ClassificationService {
                     技术讨论涉及敏感话题但论点中立且学术讨论 **不算** illegal。
 
             严格只返回 JSON，不要任何解释、代码块标记（不要 ```json）或其他文字：
-            {"category": "<slug>", "nsfw": false, "ad": false, "flame": false, "illegal": false}
+            {"category": "<slug>", "nsfw": false, "ad": false, "flame": false, "illegal": false, "notResource": false}
             """;
 
     private final HttpClient httpClient;
@@ -249,19 +262,20 @@ ClassificationResult parseResponse(String responseBody, String host) {
             boolean nsfw    = result.path("nsfw").asBoolean(false);
             boolean ad      = result.path("ad").asBoolean(false);
             boolean flame   = result.path("flame").asBoolean(false);
-            // 旧模型可能不返回 illegal 字段，缺失时按 false 降级（不阻拦），
-            // 命中 nsfw/ad/flame 任一时已经会走 FLAGGED
-            boolean illegal = result.path("illegal").asBoolean(false);
+            // 旧模型可能不返回 illegal / notResource 字段，缺失时按 false 降级（不阻拦），
+            // 反正命中其它 flag 任一时已经会走 FLAGGED
+            boolean illegal     = result.path("illegal").asBoolean(false);
+            boolean notResource = result.path("notResource").asBoolean(false);
 
             // normalize 兜底：非法 slug 转 other
             String category = LinkCategory.normalize(rawCategory);
             if (!category.equals(rawCategory)) {
                 log.warn("classification 返回非法分类，降级为 other: host={} raw={}", host, rawCategory);
             }
 
-            log.debug("classification 完成: host={} category={} nsfw={} ad={} flame={} illegal={}",
-                    host, category, nsfw, ad, flame, illegal);
-            return new ClassificationResult(category, nsfw, ad, flame, illegal);
+            log.debug("classification 完成: host={} category={} nsfw={} ad={} flame={} illegal={} notResource={}",
+                    host, category, nsfw, ad, flame, illegal, notResource);
+            return new ClassificationResult(category, nsfw, ad, flame, illegal, notResource);
 
         } catch (Exception e) {
             log.warn("classification 响应解析失败，降级: host={} error={}", host, e.getMessage());
diff --git a/src/main/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorker.java b/src/main/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorker.java
@@ -110,19 +110,20 @@ private void doEnrich(Long linkId) {
         if (cls.anyFlagSet()) {
             // 任一安全 flag 命中 → FLAGGED，进人工待审
             finalStatus = SharedLinkStatus.FLAGGED;
-            log.info("enrichment 标记 FLAGGED: linkId={} nsfw={} ad={} flame={} illegal={}",
-                    linkId, cls.nsfw(), cls.ad(), cls.flame(), cls.illegal());
+            log.info("enrichment 标记 FLAGGED: linkId={} nsfw={} ad={} flame={} illegal={} notResource={}",
+                    linkId, cls.nsfw(), cls.ad(), cls.flame(), cls.illegal(), cls.notResource());
         } else {
             finalStatus = SharedLinkStatus.APPROVED;
             log.info("enrichment AI 放行 APPROVED: linkId={} host={}", linkId, host);
         }
 
         // ── 步骤 4：回填数据库 ───────────────────────────────────────────
         Map<String, Boolean> flags = Map.of(
-                "nsfw",    cls.nsfw(),
-                "ad",      cls.ad(),
-                "flame",   cls.flame(),
-                "illegal", cls.illegal()
+                "nsfw",        cls.nsfw(),
+                "ad",          cls.ad(),
+                "flame",       cls.flame(),
+                "illegal",     cls.illegal(),
+                "notResource", cls.notResource()
         );
 
         sharedLinkService.enrich(
@@ -157,7 +158,7 @@ private void tryFallbackStatus(Long linkId) {
                     null, null, null, null,
                     "enrichment worker 未捕获异常，降级",
                     "other",
-                    Map.of("nsfw", false, "ad", false, "flame", false),
+                    Map.of("nsfw", false, "ad", false, "flame", false, "illegal", false, "notResource", false),
                     SharedLinkStatus.PENDING_MANUAL
             );
             log.info("enrichment 降级完成: linkId={} -> PENDING_MANUAL", linkId);
diff --git a/src/test/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorkerTests.java b/src/test/java/com/involutionhell/backend/community/service/SharedLinkEnrichmentWorkerTests.java
@@ -74,7 +74,7 @@ void enrich_nonFlagged_doesNotFireWebhook() {
         when(ogFetchService.fetch(anyString())).thenReturn(
                 new OgFetchResult("标题", null, null, null, null));
         when(classificationService.classify(any(), any(), any())).thenReturn(
-                new ClassificationResult("other", false, false, false, false));
+                new ClassificationResult("other", false, false, false, false, false));
 
         worker.enrich(100L);
 
@@ -91,7 +91,7 @@ void enrich_whitelistDomain_noFlags_statusBecomesApproved() {
         when(ogFetchService.fetch(anyString())).thenReturn(
                 new OgFetchResult("标题", "描述", "https://cover.jpg", "某公众号", null));
         when(classificationService.classify(anyString(), anyString(), anyString())).thenReturn(
-                new ClassificationResult("engineering", false, false, false, false));
+                new ClassificationResult("engineering", false, false, false, false, false));
 
         worker.enrich(1L);
 
@@ -115,7 +115,7 @@ void enrich_nonWhitelistDomain_noFlags_statusBecomesApproved_afterSimplification
         when(ogFetchService.fetch(anyString())).thenReturn(
                 new OgFetchResult("非白名单文章", null, null, null, null));
         when(classificationService.classify(any(), any(), any())).thenReturn(
-                new ClassificationResult("other", false, false, false, false));
+                new ClassificationResult("other", false, false, false, false, false));
 
         worker.enrich(2L);
 
@@ -136,7 +136,7 @@ void enrich_flaggedByAd_statusBecomesFlagged_regardlessOfWhitelist() {
         when(ogFetchService.fetch(anyString())).thenReturn(
                 new OgFetchResult("限时特卖！", "买一送一", null, null, null));
         when(classificationService.classify(any(), any(), any())).thenReturn(
-                new ClassificationResult("other", false, true, false, false)); // ad=true
+                new ClassificationResult("other", false, true, false, false, false)); // ad=true
 
         worker.enrich(3L);
 
@@ -156,7 +156,7 @@ void enrich_nsfwFlag_statusBecomesFlagged() {
         when(ogFetchService.fetch(anyString())).thenReturn(
                 new OgFetchResult("问题标题", null, null, null, null));
         when(classificationService.classify(any(), any(), any())).thenReturn(
-                new ClassificationResult("lifestyle", true, false, false, false)); // nsfw=true
+                new ClassificationResult("lifestyle", true, false, false, false, false)); // nsfw=true
 
         worker.enrich(4L);
 
@@ -178,7 +178,7 @@ void enrich_ogFetchFails_stillCompletesEnrichment() {
         when(ogFetchService.fetch(anyString())).thenReturn(
                 OgFetchResult.failure("HTTP 403"));
         when(classificationService.classify(isNull(), isNull(), eq(host))).thenReturn(
-                new ClassificationResult("other", false, false, false, false));
+                new ClassificationResult("other", false, false, false, false, false));
 
         worker.enrich(5L);
 
@@ -236,7 +236,7 @@ void enrich_flameFlag_flagsMapContainsCorrectValues() {
         when(ogFetchService.fetch(anyString())).thenReturn(
                 new OgFetchResult("引战标题", null, null, null, null));
         when(classificationService.classify(any(), any(), any())).thenReturn(
-                new ClassificationResult("industry", false, false, true, false)); // flame=true
+                new ClassificationResult("industry", false, false, true, false, false)); // flame=true
 
         worker.enrich(7L);
 
@@ -251,4 +251,32 @@ void enrich_flameFlag_flagsMapContainsCorrectValues() {
         assertThat(flags.get("ad")).isFalse();
         assertThat(flags.get("flame")).isTrue();
     }
+
+    // ── 场景 8：notResource=true → FLAGGED（兜底拦表情包/裸图片/dev URL） ────
+
+    @Test
+    void enrich_notResourceFlag_routesToFlagged() {
+        String host = "klipy.com";
+        SharedLink link = stubLink(8L, "https://klipy.com/gifs/hello-1234", host);
+        when(sharedLinkService.findById(8L)).thenReturn(Optional.of(link));
+        when(ogFetchService.fetch(anyString())).thenReturn(
+                new OgFetchResult(null, null, null, null, null));
+        when(classificationService.classify(any(), any(), any())).thenReturn(
+                new ClassificationResult("other", false, false, false, false, true)); // notResource=true
+
+        worker.enrich(8L);
+
+        @SuppressWarnings("unchecked")
+        ArgumentCaptor<Map<String, Boolean>> flagsCaptor = ArgumentCaptor.forClass(Map.class);
+        verify(sharedLinkService).enrich(eq(8L),
+                any(), any(), any(), any(), any(),
+                any(), flagsCaptor.capture(), eq(SharedLinkStatus.FLAGGED));
+
+        Map<String, Boolean> flags = flagsCaptor.getValue();
+        assertThat(flags.get("nsfw")).isFalse();
+        assertThat(flags.get("ad")).isFalse();
+        assertThat(flags.get("flame")).isFalse();
+        assertThat(flags.get("illegal")).isFalse();
+        assertThat(flags.get("notResource")).isTrue();
+    }
 }