diff --git a/.env.sample b/.env.sample
index 79b1dc55..e7c58b60 100644
--- a/.env.sample
+++ b/.env.sample
@@ -30,6 +30,20 @@ INTERN_KEY=
 # 在 https://open.bigmodel.cn/ 注册后获取
 ZHIPU_API_KEY=
 
+# Upstash Redis（Serverless Redis over HTTP）—— 给 AI 接口做 per-IP rate limit
+# 免费模型 GLM-4.6V-Flash 并发极低（≈5），不限流单用户就能打爆。
+#
+# 获取方式（任选其一）：
+# 1. 在 https://console.upstash.com/ 手动建 Redis 库，直接复制 REST URL / Token
+# 2. Vercel Project → Integrations → Upstash → 一键绑定（env 会自动注入项目）
+#
+# 代码会自动识别以下三种命名（按优先级）：
+#   a. UPSTASH_REDIS_REST_URL / _TOKEN                 （手动建推荐）
+#   b. UPSTASH_REDIS_REST_KV_REST_API_URL / _TOKEN     （Vercel 集成 + 自定义 prefix）
+#   c. KV_REST_API_URL / _TOKEN                        （Vercel 集成 + 默认无 prefix）
+# 未配置时限流会自动降级为放行 + 一次 warn，不会阻塞接口。
+UPSTASH_REDIS_REST_URL=
+UPSTASH_REDIS_REST_TOKEN=
 # Sentry 错误监控（Developer plan 免费 5K errors / 10K perf units / 月）
 # NEXT_PUBLIC_SENTRY_DSN 是浏览器端需要的公开 DSN，暴露在前端 bundle 里属于设计，
 # SENTRY_AUTH_TOKEN 仅用于 next build 时上传 source map，私密。
diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
index af6e1bc3..b2fc5927 100644
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -3,6 +3,7 @@ import { streamText, UIMessage, convertToModelMessages } from "ai";
 import { getModel, requiresApiKey, type AIProvider } from "@/lib/ai/models";
 import { buildSystemMessage } from "@/lib/ai/prompt";
 import { source } from "@/lib/source";
+import { limitChat, rateLimitResponse } from "@/lib/rate-limit";
 import fs from "fs/promises";
 import path from "path";
 
@@ -29,6 +30,22 @@ interface ChatRequest {
 import { resolveUserId } from "@/lib/server-auth";
 
 export async function POST(req: Request) {
+  // 0. Rate limit：免费模型 GLM-4.6V-Flash 并发极低（≈ 5），
+  //    单用户开几个 tab 就能打爆。per-IP 滑动窗口限流先挡一层。
+  //    （L2 防护；如果 Upstash env 漏配会自动降级为放行+warn）
+  //
+  //    预读 body 判断是否带图（hasImage=true 会触发更严的 5 req/60s 窗口）。
+  //    为此多克一次请求，后续 proxyReq/req.json() 仍可独立读（Copilot CR #4）。
+  let hasImage = false;
+  try {
+    const body = (await req.clone().json()) as Partial<ChatRequest>;
+    hasImage = messagesHaveImage(body.messages);
+  } catch {
+    // body 不是合法 JSON：按无图处理，继续让下游的 req.json() 去报真正的错
+  }
+  const rl = await limitChat(req, hasImage);
+  if (!rl.success) return rateLimitResponse(rl);
+
   // 1. 克隆请求，因为如果代理失败，后面的代码还需要读取 req.json()
   const proxyReq = req.clone();
 
@@ -234,6 +251,19 @@ export async function POST(req: Request) {
       return Response.json({ error: error.message }, { status: 400 });
     }
 
+    // 识别上游（智谱 GLM）限流/欠费/鉴权错误，给出结构化 code 让前端友好提示。
+    // 智谱业务码参考：
+    //   1302 - 接口请求并发超额（与 HTTP 429 对应）
+    //   1113 - 账户余额不足 / 免费额度耗尽
+    //   1001/1002/1003 - 鉴权失败
+    const mapped = mapUpstreamError(error);
+    if (mapped) {
+      return Response.json(
+        { error: mapped.message, code: mapped.code },
+        { status: mapped.status },
+      );
+    }
+
     return Response.json(
       { error: "Failed to process chat request" },
       { status: 500 },
@@ -241,6 +271,94 @@ export async function POST(req: Request) {
   }
 }
 
+/**
+ * 判断一组 UIMessage 里是否含图片 part。支持 AI SDK v5 的多种图片表达：
+ * `type === "image"` / `type === "image_url"` / `type === "file"` 且 mediaType 起头 image。
+ * 任何异常结构都当作无图，宁可放过也不误杀。
+ */
+function messagesHaveImage(messages: unknown): boolean {
+  if (!Array.isArray(messages)) return false;
+  return messages.some((msg) => {
+    if (!msg || typeof msg !== "object") return false;
+    const parts = (msg as { parts?: unknown }).parts;
+    if (!Array.isArray(parts)) return false;
+    return parts.some((part) => {
+      if (!part || typeof part !== "object") return false;
+      const type = (part as { type?: unknown }).type;
+      if (type === "image" || type === "image_url") return true;
+      if (type === "file") {
+        const mediaType = (part as { mediaType?: unknown }).mediaType;
+        return typeof mediaType === "string" && mediaType.startsWith("image/");
+      }
+      return false;
+    });
+  });
+}
+
+interface MappedUpstreamError {
+  status: number;
+  code: "rate_limited" | "quota_exhausted" | "upstream_auth" | "upstream_down";
+  message: string;
+}
+
+function mapUpstreamError(err: unknown): MappedUpstreamError | null {
+  if (!err) return null;
+
+  // 仅使用 message / response payload，**不要拼 stack** —— stack 里带行号
+  // 形如 `:429:` / `:1302:` 会误匹配业务码正则（Copilot CR #5）。
+  // JSON.stringify 对循环引用会抛错，用 try/catch 兜底（Copilot CR #6）。
+  let raw: string;
+  if (err instanceof Error) {
+    raw = err.message;
+  } else if (typeof err === "string") {
+    raw = err;
+  } else {
+    try {
+      raw = JSON.stringify(err);
+    } catch {
+      raw = String(err);
+    }
+  }
+
+  // 业务码正则：全部用 `[^\s]{0,N}?` 代替 `.*`，限死回溯深度避免 ReDoS
+  // （CodeQL polynomial regex 告警）。关键词语义够短，10~20 字符窗口足够。
+  const hasStatus429 = /\b429\b|rate[-_ ]?limit|too many requests/i.test(raw);
+  const has1302 = /\b1302\b|并发超额|速率限制|控制请求频率/.test(raw);
+  const has1113 =
+    /\b1113\b|余额不足|额度[^\s]{0,10}?耗尽|quota[^\s]{0,10}?exhaust/i.test(
+      raw,
+    );
+  const hasAuth =
+    /\b1001\b|\b1002\b|\b1003\b|\b401\b|unauthorized|invalid[^\s]{0,10}?api[^\s]{0,10}?key/i.test(
+      raw,
+    );
+
+  if (has1302 || hasStatus429) {
+    return {
+      status: 429,
+      code: "rate_limited",
+      message: "AI 服务被挤爆了，排队中，请 30 秒后再试。(上游并发限流)",
+    };
+  }
+  if (has1113) {
+    return {
+      status: 503,
+      code: "quota_exhausted",
+      message:
+        "免费模型今日额度已用完，请明天再来，或在设置里切到你自己的 OpenAI/Gemini。",
+    };
+  }
+  if (hasAuth) {
+    return {
+      status: 502,
+      code: "upstream_auth",
+      message:
+        "AI 服务密钥配置异常，站点管理员已收到通知。请稍后重试或切换到自有 API Key。",
+    };
+  }
+  return null;
+}
+
 // 提取纯文本内容，过滤掉 MDX 语法
 function extractTextFromMDX(content: string): string {
   let text = content
diff --git a/app/api/suggestions/route.ts b/app/api/suggestions/route.ts
index 7a6c11ea..a07f1c2f 100644
--- a/app/api/suggestions/route.ts
+++ b/app/api/suggestions/route.ts
@@ -2,6 +2,7 @@ import { generateText } from "ai";
 import { unstable_cache } from "next/cache";
 import { getModel, requiresApiKey, type AIProvider } from "@/lib/ai/models";
 import { createGlmFlashModel } from "@/lib/ai/providers/glm";
+import { limitChat, rateLimitResponse } from "@/lib/rate-limit";
 
 // 允许流式响应最长30秒
 export const maxDuration = 30;
@@ -20,6 +21,10 @@ interface SuggestionsRequest {
 }
 
 export async function POST(req: Request) {
+  // Rate limit：suggestions 也打 LLM，共用同一 IP 额度池
+  const rl = await limitChat(req, false);
+  if (!rl.success) return rateLimitResponse(rl);
+
   try {
     const {
       messages,
diff --git a/app/components/DocsAssistant.tsx b/app/components/DocsAssistant.tsx
index 49d07a92..d6f4e4ab 100644
--- a/app/components/DocsAssistant.tsx
+++ b/app/components/DocsAssistant.tsx
@@ -465,11 +465,15 @@ function deriveAssistantError(
         ? message
         : `The ${providerLabel} API key looks incorrect. Update it in settings and try again.`;
   } else if (statusCode === 429) {
+    // 优先用服务端返回的中文友好提示（rate_limited / quota_exhausted），
+    // 只在服务端没给消息时才兜底到默认英文文案
     friendlyMessage =
-      "The provider is rate limiting requests. Please wait and try again.";
+      message && message.length > 0 ? message : "请求太频繁，请稍等片刻再试。";
   } else if (statusCode && statusCode >= 500) {
     friendlyMessage =
-      "The AI provider is currently unavailable. Please try again soon.";
+      message && message.length > 0
+        ? message
+        : "AI 服务暂时不可用，请稍后再试。";
   }
 
   return {
diff --git a/app/components/assistant-ui/thread.tsx b/app/components/assistant-ui/thread.tsx
index 9dac98d9..c84d0d7e 100644
--- a/app/components/assistant-ui/thread.tsx
+++ b/app/components/assistant-ui/thread.tsx
@@ -395,6 +395,8 @@ const Composer: FC<ComposerProps> = ({
           autoFocus
           aria-label="Message input"
           disabled={!hasActiveKey}
+          // 单条消息硬上限 4000 字符：防 token bomb，保护免费模型额度
+          maxLength={4000}
         />
         <ComposerAction
           canSend={hasActiveKey}
diff --git a/lib/rate-limit.ts b/lib/rate-limit.ts
new file mode 100644
index 00000000..5fb36c75
--- /dev/null
+++ b/lib/rate-limit.ts
@@ -0,0 +1,223 @@
+/**
+ * 基于 Upstash Redis 的分布式 rate limiter，专门给 AI 相关 API 用。
+ *
+ * 背景：免费模型 GLM-4.6V-Flash 并发上限很低（≈ 5），单个用户开几个 tab
+ * 就能打爆。必须 per-IP 滑动窗口限流，阻止一个访客拖垮整个站点。
+ *
+ * 环境变量缺失时自动降级为"不限流 + 打 warn"：允许本地 dev 零配置启动，
+ * 但生产必须配齐 UPSTASH_REDIS_REST_URL / UPSTASH_REDIS_REST_TOKEN。
+ *
+ * 使用：
+ *   const result = await limitChat(req);
+ *   if (!result.success) return rateLimitResponse(result);
+ */
+import { Ratelimit } from "@upstash/ratelimit";
+import { Redis } from "@upstash/redis";
+
+// 单例 Redis/Ratelimit 实例，避免每次请求都重建连接
+let cachedChatLimiter: Ratelimit | null = null;
+let cachedChatImageLimiter: Ratelimit | null = null;
+let cachedDailyLimiter: Ratelimit | null = null;
+// Upstash env 缺失的 warn 只在模块生命周期内打一次，
+// 避免生产环境每请求刷爆 serverless 日志（Copilot CR #3）
+let hasWarnedMissingUpstash = false;
+
+/**
+ * 挑第一个非空 env var 返回；本地开发 + Vercel 不同集成版本的命名差异靠它兜住。
+ */
+function firstEnv(...names: string[]): string | undefined {
+  for (const n of names) {
+    const v = process.env[n];
+    if (v && v.trim()) return v;
+  }
+  return undefined;
+}
+
+function getRedis(): Redis | null {
+  // Upstash 的 env 名字在不同集成路径下会长得不一样：
+  //   - 手动从 Upstash 控制台复制       → UPSTASH_REDIS_REST_URL / _TOKEN
+  //   - Vercel 集成、无自定义 prefix    → KV_REST_API_URL / KV_REST_API_TOKEN
+  //   - Vercel 集成、prefix=UPSTASH_... → UPSTASH_REDIS_REST_KV_REST_API_URL ...
+  // 按上面优先级依次查找，读到谁用谁，免得跟集成命名斗智斗勇。
+  const url = firstEnv(
+    "UPSTASH_REDIS_REST_URL",
+    "UPSTASH_REDIS_REST_KV_REST_API_URL",
+    "KV_REST_API_URL",
+  );
+  const token = firstEnv(
+    "UPSTASH_REDIS_REST_TOKEN",
+    "UPSTASH_REDIS_REST_KV_REST_API_TOKEN",
+    "KV_REST_API_TOKEN",
+  );
+  if (!url || !token) return null;
+  return new Redis({ url, token });
+}
+
+/** 纯文本聊天：10 req / 60s / IP */
+function getChatLimiter(): Ratelimit | null {
+  if (cachedChatLimiter) return cachedChatLimiter;
+  const redis = getRedis();
+  if (!redis) return null;
+  cachedChatLimiter = new Ratelimit({
+    redis,
+    limiter: Ratelimit.slidingWindow(10, "60 s"),
+    analytics: true,
+    prefix: "ratelimit:chat:text",
+  });
+  return cachedChatLimiter;
+}
+
+/** 带图聊天：5 req / 60s / IP（图片更贵，收严） */
+function getChatImageLimiter(): Ratelimit | null {
+  if (cachedChatImageLimiter) return cachedChatImageLimiter;
+  const redis = getRedis();
+  if (!redis) return null;
+  cachedChatImageLimiter = new Ratelimit({
+    redis,
+    limiter: Ratelimit.slidingWindow(5, "60 s"),
+    analytics: true,
+    prefix: "ratelimit:chat:image",
+  });
+  return cachedChatImageLimiter;
+}
+
+/** 日限：100 req / 24h / IP，防长尾刷量 */
+function getDailyLimiter(): Ratelimit | null {
+  if (cachedDailyLimiter) return cachedDailyLimiter;
+  const redis = getRedis();
+  if (!redis) return null;
+  cachedDailyLimiter = new Ratelimit({
+    redis,
+    limiter: Ratelimit.slidingWindow(100, "24 h"),
+    analytics: true,
+    prefix: "ratelimit:chat:daily",
+  });
+  return cachedDailyLimiter;
+}
+
+/**
+ * 从 request headers 里提取客户端 IP。
+ *
+ * 防伪造（Copilot CR #2）：
+ * - 优先读 `x-real-ip`：Vercel/多数 CDN 只写由自己验证过的真实客户端 IP，
+ *   不会把客户端伪造的值透传进来，最可信。
+ * - 没有 `x-real-ip` 时才降级到 `x-forwarded-for`；但不能取 XFF 的 **第一个**，
+ *   因为那是客户端可以随便伪造的值；应该取 **最后一个非空项**，也就是最内层
+ *   可信代理看到的实际来源地址。
+ * - 都没有（本地 dev）时用固定字符串，所有请求共享一个额度桶，避免本地爆测。
+ */
+function getClientIp(req: Request): string {
+  const xri = req.headers.get("x-real-ip");
+  if (xri && xri.trim()) return xri.trim();
+
+  const xff = req.headers.get("x-forwarded-for");
+  if (xff) {
+    const parts = xff
+      .split(",")
+      .map((ip) => ip.trim())
+      .filter(Boolean);
+    if (parts.length > 0) return parts[parts.length - 1];
+  }
+
+  return "anonymous";
+}
+
+export interface RateLimitResult {
+  success: boolean;
+  limit: number;
+  remaining: number;
+  /** Unix ms timestamp when the window resets */
+  reset: number;
+  /** 当 Upstash 未配置时，此字段为 true，调用方应跳过限流 */
+  skipped?: boolean;
+}
+
+/**
+ * 对聊天请求做两层限流：per-minute + per-day，任一维度不过就算失败。
+ * @param req  Next.js Request
+ * @param hasImage  消息是否携带图片（影响每分钟窗口严格度）
+ */
+export async function limitChat(
+  req: Request,
+  hasImage = false,
+): Promise<RateLimitResult> {
+  const minuteLimiter = hasImage ? getChatImageLimiter() : getChatLimiter();
+  const dayLimiter = getDailyLimiter();
+
+  // Upstash 未配置：本地开发或生产漏配。不阻塞请求，只打一次 warn 提示运维。
+  // 不再按 NODE_ENV 区分（dev 也提示，免得开发期"没限流却不知道"），
+  // 用 module 级 flag 避免每请求刷爆日志（Copilot CR #3）。
+  if (!minuteLimiter || !dayLimiter) {
+    if (!hasWarnedMissingUpstash) {
+      hasWarnedMissingUpstash = true;
+      console.warn(
+        "[rate-limit] UPSTASH_REDIS_REST_URL / UPSTASH_REDIS_REST_TOKEN 未配置，" +
+          "聊天接口暂无限流保护（本实例生命周期内不会再次提示）。" +
+          "生产环境请在 Vercel Env 中补齐。",
+      );
+    }
+    return {
+      success: true,
+      limit: Infinity,
+      remaining: Infinity,
+      reset: 0,
+      skipped: true,
+    };
+  }
+
+  const ip = getClientIp(req);
+  const [minuteRes, dayRes] = await Promise.all([
+    minuteLimiter.limit(ip),
+    dayLimiter.limit(ip),
+  ]);
+
+  if (!minuteRes.success) {
+    return {
+      success: false,
+      limit: minuteRes.limit,
+      remaining: minuteRes.remaining,
+      reset: minuteRes.reset,
+    };
+  }
+  if (!dayRes.success) {
+    return {
+      success: false,
+      limit: dayRes.limit,
+      remaining: dayRes.remaining,
+      reset: dayRes.reset,
+    };
+  }
+  // 取剩余额度较紧的一档回给调用方
+  const tighter = minuteRes.remaining <= dayRes.remaining ? minuteRes : dayRes;
+  return {
+    success: true,
+    limit: tighter.limit,
+    remaining: tighter.remaining,
+    reset: tighter.reset,
+  };
+}
+
+/** 生成 429 响应，带标准 Retry-After 和 X-RateLimit-* 头 */
+export function rateLimitResponse(result: RateLimitResult): Response {
+  const retryAfterSec = Math.max(
+    1,
+    Math.ceil((result.reset - Date.now()) / 1000),
+  );
+  return new Response(
+    JSON.stringify({
+      error: "请求太频繁了，喘口气再来。",
+      code: "rate_limited",
+      retryAfter: retryAfterSec,
+    }),
+    {
+      status: 429,
+      headers: {
+        "Content-Type": "application/json",
+        "Retry-After": String(retryAfterSec),
+        "X-RateLimit-Limit": String(result.limit),
+        "X-RateLimit-Remaining": String(result.remaining),
+        "X-RateLimit-Reset": String(result.reset),
+      },
+    },
+  );
+}
diff --git a/package.json b/package.json
index 1db3c9be..a33981cf 100644
--- a/package.json
+++ b/package.json
@@ -48,6 +48,8 @@
     "@sentry/nextjs": "^10.49.0",
     "@types/mdx": "^2.0.13",
     "@types/pg": "^8.16.0",
+    "@upstash/ratelimit": "^2.0.8",
+    "@upstash/redis": "^1.37.0",
     "@vercel/speed-insights": "^1.2.0",
     "ai": "^6.0.148",
     "antd": "^5.27.4",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 4201ce7d..c9ea7816 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -93,6 +93,12 @@ importers:
       '@types/pg':
         specifier: ^8.16.0
         version: 8.20.0
+      '@upstash/ratelimit':
+        specifier: ^2.0.8
+        version: 2.0.8(@upstash/redis@1.37.0)
+      '@upstash/redis':
+        specifier: ^1.37.0
+        version: 1.37.0
       '@vercel/speed-insights':
         specifier: ^1.2.0
         version: 1.3.1(next@16.2.3(@babel/core@7.29.0)(@opentelemetry/api@1.9.1)(react-dom@19.2.3(react@19.2.3))(react@19.2.3))(react@19.2.3)(vue@3.5.32(typescript@5.9.3))
@@ -4024,6 +4030,18 @@ packages:
     cpu: [x64]
     os: [win32]
 
+  '@upstash/core-analytics@0.0.10':
+    resolution: {integrity: sha512-7qJHGxpQgQr9/vmeS1PktEwvNAF7TI4iJDi8Pu2CFZ9YUGHZH4fOP5TfYlZ4aVxfopnELiE4BS4FBjyK7V1/xQ==}
+    engines: {node: '>=16.0.0'}
+
+  '@upstash/ratelimit@2.0.8':
+    resolution: {integrity: sha512-YSTMBJ1YIxsoPkUMX/P4DDks/xV5YYCswWMamU8ZIfK9ly6ppjRnVOyBhMDXBmzjODm4UQKcxsJPvaeFAijp5w==}
+    peerDependencies:
+      '@upstash/redis': ^1.34.3
+
+  '@upstash/redis@1.37.0':
+    resolution: {integrity: sha512-LqOJ3+XWPLSZ2rGSed5DYG3ixybxb8EhZu3yQqF7MdZX1wLBG/FRcI6xcUZXHy/SS7mmXWyadrud0HJHkOc+uw==}
+
   '@vercel/backends@0.0.14':
     resolution: {integrity: sha512-4a4LQueJCvwqJhz+B9DBlEOZOdyl+BrIMkC1LZC3++YGbEA9KLhcBwS10WF7hndQR1jizpf7klMQbcU2FwaN/g==}
 
@@ -7855,6 +7873,9 @@ packages:
     resolution: {integrity: sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==}
     engines: {node: '>= 0.4'}
 
+  uncrypto@0.1.3:
+    resolution: {integrity: sha512-Ql87qFHB3s/De2ClA9e0gsnS6zXG27SkTiSJwjCc9MebbfapQfuPzumMIUMi38ezPZVNFcHI9sUIepeQfw8J8Q==}
+
   undici-types@7.19.2:
     resolution: {integrity: sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==}
 
@@ -12684,6 +12705,19 @@ snapshots:
   '@unrs/resolver-binding-win32-x64-msvc@1.11.1':
     optional: true
 
+  '@upstash/core-analytics@0.0.10':
+    dependencies:
+      '@upstash/redis': 1.37.0
+
+  '@upstash/ratelimit@2.0.8(@upstash/redis@1.37.0)':
+    dependencies:
+      '@upstash/core-analytics': 0.0.10
+      '@upstash/redis': 1.37.0
+
+  '@upstash/redis@1.37.0':
+    dependencies:
+      uncrypto: 0.1.3
+
   '@vercel/backends@0.0.14(@emnapi/core@1.9.2)(@emnapi/runtime@1.9.2)(rollup@4.60.1)(typescript@5.9.3)':
     dependencies:
       '@vercel/cervel': 0.0.6(@emnapi/core@1.9.2)(@emnapi/runtime@1.9.2)(typescript@5.9.3)
@@ -17643,6 +17677,8 @@ snapshots:
       has-symbols: 1.1.0
       which-boxed-primitive: 1.1.1
 
+  uncrypto@0.1.3: {}
+
   undici-types@7.19.2: {}
 
   undici@5.28.4: