InvolutionHell · Crokily · Apr 17, 2026 · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/.env.sample b/.env.sample
@@ -30,6 +30,20 @@ INTERN_KEY=
 # 在 https://open.bigmodel.cn/ 注册后获取
 ZHIPU_API_KEY=
 
+# Upstash Redis（Serverless Redis over HTTP）—— 给 AI 接口做 per-IP rate limit
+# 免费模型 GLM-4.6V-Flash 并发极低（≈5），不限流单用户就能打爆。
+#
+# 获取方式（任选其一）：
+# 1. 在 https://console.upstash.com/ 手动建 Redis 库，直接复制 REST URL / Token
+# 2. Vercel Project → Integrations → Upstash → 一键绑定（env 会自动注入项目）
+#
+# 代码会自动识别以下三种命名（按优先级）：
+#   a. UPSTASH_REDIS_REST_URL / _TOKEN                 （手动建推荐）
+#   b. UPSTASH_REDIS_REST_KV_REST_API_URL / _TOKEN     （Vercel 集成 + 自定义 prefix）
+#   c. KV_REST_API_URL / _TOKEN                        （Vercel 集成 + 默认无 prefix）
+# 未配置时限流会自动降级为放行 + 一次 warn，不会阻塞接口。
+UPSTASH_REDIS_REST_URL=
+UPSTASH_REDIS_REST_TOKEN=
 # Sentry 错误监控（Developer plan 免费 5K errors / 10K perf units / 月）
 # NEXT_PUBLIC_SENTRY_DSN 是浏览器端需要的公开 DSN，暴露在前端 bundle 里属于设计，
 # SENTRY_AUTH_TOKEN 仅用于 next build 时上传 source map，私密。

diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
@@ -3,6 +3,7 @@ import { streamText, UIMessage, convertToModelMessages } from "ai";
 import { getModel, requiresApiKey, type AIProvider } from "@/lib/ai/models";
 import { buildSystemMessage } from "@/lib/ai/prompt";
 import { source } from "@/lib/source";
+import { limitChat, rateLimitResponse } from "@/lib/rate-limit";
 import fs from "fs/promises";
 import path from "path";
 
@@ -29,6 +30,22 @@ interface ChatRequest {
 import { resolveUserId } from "@/lib/server-auth";
 
 export async function POST(req: Request) {
+  // 0. Rate limit：免费模型 GLM-4.6V-Flash 并发极低（≈ 5），
+  //    单用户开几个 tab 就能打爆。per-IP 滑动窗口限流先挡一层。
+  //    （L2 防护；如果 Upstash env 漏配会自动降级为放行+warn）
+  //
+  //    预读 body 判断是否带图（hasImage=true 会触发更严的 5 req/60s 窗口）。
+  //    为此多克一次请求，后续 proxyReq/req.json() 仍可独立读（Copilot CR #4）。
+  let hasImage = false;
+  try {
+    const body = (await req.clone().json()) as Partial<ChatRequest>;
+    hasImage = messagesHaveImage(body.messages);
+  } catch {
+    // body 不是合法 JSON：按无图处理，继续让下游的 req.json() 去报真正的错
+  }
+  const rl = await limitChat(req, hasImage);
+  if (!rl.success) return rateLimitResponse(rl);
+
   // 1. 克隆请求，因为如果代理失败，后面的代码还需要读取 req.json()
   const proxyReq = req.clone();
 
@@ -234,13 +251,114 @@ export async function POST(req: Request) {
       return Response.json({ error: error.message }, { status: 400 });
     }
 
+    // 识别上游（智谱 GLM）限流/欠费/鉴权错误，给出结构化 code 让前端友好提示。
+    // 智谱业务码参考：
+    //   1302 - 接口请求并发超额（与 HTTP 429 对应）
+    //   1113 - 账户余额不足 / 免费额度耗尽
+    //   1001/1002/1003 - 鉴权失败
+    const mapped = mapUpstreamError(error);
+    if (mapped) {
+      return Response.json(
+        { error: mapped.message, code: mapped.code },
+        { status: mapped.status },
+      );
+    }
+
     return Response.json(
       { error: "Failed to process chat request" },
       { status: 500 },
     );
   }
 }
 
+/**
+ * 判断一组 UIMessage 里是否含图片 part。支持 AI SDK v5 的多种图片表达：
+ * `type === "image"` / `type === "image_url"` / `type === "file"` 且 mediaType 起头 image。
+ * 任何异常结构都当作无图，宁可放过也不误杀。
+ */
+function messagesHaveImage(messages: unknown): boolean {
+  if (!Array.isArray(messages)) return false;
+  return messages.some((msg) => {
+    if (!msg || typeof msg !== "object") return false;
+    const parts = (msg as { parts?: unknown }).parts;
+    if (!Array.isArray(parts)) return false;
+    return parts.some((part) => {
+      if (!part || typeof part !== "object") return false;
+      const type = (part as { type?: unknown }).type;
+      if (type === "image" || type === "image_url") return true;
+      if (type === "file") {
+        const mediaType = (part as { mediaType?: unknown }).mediaType;
+        return typeof mediaType === "string" && mediaType.startsWith("image/");
+      }
+      return false;
+    });
+  });
+}
+
+interface MappedUpstreamError {
+  status: number;
+  code: "rate_limited" | "quota_exhausted" | "upstream_auth" | "upstream_down";
+  message: string;
+}
+
+function mapUpstreamError(err: unknown): MappedUpstreamError | null {
+  if (!err) return null;
+
+  // 仅使用 message / response payload，**不要拼 stack** —— stack 里带行号
+  // 形如 `:429:` / `:1302:` 会误匹配业务码正则（Copilot CR #5）。
+  // JSON.stringify 对循环引用会抛错，用 try/catch 兜底（Copilot CR #6）。
+  let raw: string;
+  if (err instanceof Error) {
+    raw = err.message;
+  } else if (typeof err === "string") {
+    raw = err;
+  } else {
+    try {
+      raw = JSON.stringify(err);
+    } catch {
+      raw = String(err);
+    }
+  }
+
+  // 业务码正则：全部用 `[^\s]{0,N}?` 代替 `.*`，限死回溯深度避免 ReDoS
+  // （CodeQL polynomial regex 告警）。关键词语义够短，10~20 字符窗口足够。
+  const hasStatus429 = /\b429\b|rate[-_ ]?limit|too many requests/i.test(raw);
+  const has1302 = /\b1302\b|并发超额|速率限制|控制请求频率/.test(raw);
+  const has1113 =
+    /\b1113\b|余额不足|额度[^\s]{0,10}?耗尽|quota[^\s]{0,10}?exhaust/i.test(
+      raw,
+    );
+  const hasAuth =
+    /\b1001\b|\b1002\b|\b1003\b|\b401\b|unauthorized|invalid[^\s]{0,10}?api[^\s]{0,10}?key/i.test(
+      raw,
+    );
+
+  if (has1302 || hasStatus429) {
+    return {
+      status: 429,
+      code: "rate_limited",
+      message: "AI 服务被挤爆了，排队中，请 30 秒后再试。(上游并发限流)",
+    };
+  }
+  if (has1113) {
+    return {
+      status: 503,
+      code: "quota_exhausted",
+      message:
+        "免费模型今日额度已用完，请明天再来，或在设置里切到你自己的 OpenAI/Gemini。",
+    };
+  }
+  if (hasAuth) {
+    return {
+      status: 502,
+      code: "upstream_auth",
+      message:
+        "AI 服务密钥配置异常，站点管理员已收到通知。请稍后重试或切换到自有 API Key。",
+    };
+  }
+  return null;
+}
+
 // 提取纯文本内容，过滤掉 MDX 语法
 function extractTextFromMDX(content: string): string {
   let text = content

diff --git a/app/api/suggestions/route.ts b/app/api/suggestions/route.ts
@@ -2,6 +2,7 @@ import { generateText } from "ai";
 import { unstable_cache } from "next/cache";
 import { getModel, requiresApiKey, type AIProvider } from "@/lib/ai/models";
 import { createGlmFlashModel } from "@/lib/ai/providers/glm";
+import { limitChat, rateLimitResponse } from "@/lib/rate-limit";
 
 // 允许流式响应最长30秒
 export const maxDuration = 30;
@@ -20,6 +21,10 @@ interface SuggestionsRequest {
 }
 
 export async function POST(req: Request) {
+  // Rate limit：suggestions 也打 LLM，共用同一 IP 额度池
+  const rl = await limitChat(req, false);
+  if (!rl.success) return rateLimitResponse(rl);
+
   try {
     const {
       messages,

diff --git a/app/components/DocsAssistant.tsx b/app/components/DocsAssistant.tsx
@@ -465,11 +465,15 @@ function deriveAssistantError(
         ? message
         : `The ${providerLabel} API key looks incorrect. Update it in settings and try again.`;
   } else if (statusCode === 429) {
+    // 优先用服务端返回的中文友好提示（rate_limited / quota_exhausted），
+    // 只在服务端没给消息时才兜底到默认英文文案
     friendlyMessage =
-      "The provider is rate limiting requests. Please wait and try again.";
+      message && message.length > 0 ? message : "请求太频繁，请稍等片刻再试。";
   } else if (statusCode && statusCode >= 500) {
     friendlyMessage =
-      "The AI provider is currently unavailable. Please try again soon.";
+      message && message.length > 0
+        ? message
+        : "AI 服务暂时不可用，请稍后再试。";
   }
 
   return {

diff --git a/app/components/assistant-ui/thread.tsx b/app/components/assistant-ui/thread.tsx
@@ -395,6 +395,8 @@ const Composer: FC<ComposerProps> = ({
           autoFocus
           aria-label="Message input"
           disabled={!hasActiveKey}
+          // 单条消息硬上限 4000 字符：防 token bomb，保护免费模型额度
+          maxLength={4000}
         />
         <ComposerAction
           canSend={hasActiveKey}